xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 120a1823665460d368c79d7f229e1a8d8d821f48)
1 
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3 
4 /*===================================================================================*/
5 /*              Internal routines for PetscSFPack                              */
6 /*===================================================================================*/
7 
8 /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
9    initialized (since we use persistent requests), then initialize them.
10 */
11 static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
12 {
13   PetscErrorCode ierr;
14   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
15   PetscInt       i,j,nrootranks,ndrootranks,nleafranks,ndleafranks,disp;
16   const PetscInt *rootoffset,*leafoffset;
17   PetscMPIInt    n;
18   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
19   MPI_Datatype   unit = link->unit;
20   PetscMemType   rootmtype,leafmtype;
21 
22   PetscFunctionBegin;
23   if (use_gpu_aware_mpi) {
24     rootmtype = link->rootmtype;
25     leafmtype = link->leafmtype;
26   } else {
27     rootmtype = PETSC_MEMTYPE_HOST;
28     leafmtype = PETSC_MEMTYPE_HOST;
29   }
30 
31   if (rootreqs && !link->rootreqsinited[direction][rootmtype]) {
32     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
33     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
34       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
35         disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
36         ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
37         ierr = MPI_Recv_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr);
38       }
39     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
40       for (i=ndrootranks,j=0; i<nrootranks; i++,j++) {
41         disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes;
42         ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
43         ierr = MPI_Send_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr);
44       }
45     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
46     link->rootreqsinited[direction][rootmtype] = PETSC_TRUE;
47   }
48 
49   if (leafreqs && !link->leafreqsinited[direction][leafmtype]) {
50     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
51     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
52       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
53         disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
54         ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
55         ierr = MPI_Send_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr);
56       }
57     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
58       for (i=ndleafranks,j=0; i<nleafranks; i++,j++) {
59         disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes;
60         ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
61         ierr = MPI_Recv_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr);
62       }
63     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction);
64     link->leafreqsinited[direction][leafmtype] = PETSC_TRUE;
65   }
66 
67   if (rootreqs) *rootreqs = link->rootreqs[direction][rootmtype];
68   if (leafreqs) *leafreqs = link->leafreqs[direction][leafmtype];
69   PetscFunctionReturn(0);
70 }
71 
72 /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */
73 PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscInt nrootreqs,PetscInt nleafreqs,PetscSFPack *mylink)
74 {
75   PetscErrorCode    ierr;
76   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
77   PetscInt          i,j,nreqs,nrootranks,ndrootranks,nleafranks,ndleafranks;
78   const PetscInt    *rootoffset,*leafoffset;
79   PetscSFPack       *p,link;
80   PetscBool         match;
81 
82   PetscFunctionBegin;
83   ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
84 
85   /* Look for types in cache */
86   for (p=&bas->avail; (link=*p); p=&link->next) {
87     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
88     if (match) {
89       *p = link->next; /* Remove from available list */
90       goto found;
91     }
92   }
93 
94   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
95   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
96   ierr = PetscNew(&link);CHKERRQ(ierr);
97   ierr = PetscSFPackSetUp_Host(sf,link,unit);CHKERRQ(ierr);
98   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* One tag per link */
99 
100   /* Allocate root, leaf, self buffers, and MPI requests */
101   link->rootbuflen = rootoffset[nrootranks]-rootoffset[ndrootranks];
102   link->leafbuflen = leafoffset[nleafranks]-leafoffset[ndleafranks];
103   link->selfbuflen = rootoffset[ndrootranks]*link->unitbytes;
104   link->nrootreqs  = nrootreqs;
105   link->nleafreqs  = nleafreqs;
106   nreqs            = (nrootreqs+nleafreqs)*4; /* Quadruple the requests since there are two communication directions and two memory types */
107   ierr             = PetscMalloc1(nreqs,&link->reqs);CHKERRQ(ierr);
108   for (i=0; i<nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */
109 
110   for (i=0; i<2; i++) { /* Two communication directions */
111     for (j=0; j<2; j++) { /* Two memory types */
112       link->rootreqs[i][j] = link->reqs + nrootreqs*(2*i+j);
113       link->leafreqs[i][j] = link->reqs + nrootreqs*4 + nleafreqs*(2*i+j);
114     }
115   }
116 
117 found:
118   link->rootmtype = rootmtype;
119   link->leafmtype = leafmtype;
120 #if defined(PETSC_HAVE_CUDA)
121   ierr = PetscSFPackSetUp_Device(sf,link,unit);CHKERRQ(ierr);
122   if (!use_gpu_aware_mpi) {
123     /* If not using GPU aware MPI, we always need buffers on host. In case root/leafdata is on device, we copy root/leafdata to/from
124        these buffers for MPI. We only need buffers for remote neighbors since self-to-self communication is not done via MPI.
125      */
126     if (!link->rootbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);}
127     if (!link->leafbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);}
128   }
129 #endif
130   if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);}
131   if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);}
132   if (!link->selfbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[rootmtype]);CHKERRQ(ierr);}
133   if (rootmtype != leafmtype && !link->selfbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[leafmtype]);CHKERRQ(ierr);}
134   link->rkey = rootdata;
135   link->lkey = leafdata;
136   link->next = bas->inuse;
137   bas->inuse = link;
138 
139   *mylink    = link;
140   PetscFunctionReturn(0);
141 }
142 
143 static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFDirection direction,PetscSFPack *mylink)
144 {
145   PetscErrorCode    ierr;
146   PetscInt          nrootranks,ndrootranks,nleafranks,ndleafranks;
147 
148   PetscFunctionBegin;
149   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr);
150   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
151   ierr = PetscSFPackGet_Basic_Common(sf,unit,rootmtype,rootdata,leafmtype,leafdata,nrootranks-ndrootranks,nleafranks-ndleafranks,mylink);CHKERRQ(ierr);
152   PetscFunctionReturn(0);
153 }
154 
155 /*===================================================================================*/
156 /*              SF public interface implementations                                  */
157 /*===================================================================================*/
158 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
159 {
160   PetscErrorCode ierr;
161   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
162   PetscInt       *rlengths,*ilengths,i;
163   PetscMPIInt    rank,niranks,*iranks,tag;
164   MPI_Comm       comm;
165   MPI_Group      group;
166   MPI_Request    *rootreqs,*leafreqs;
167 
168   PetscFunctionBegin;
169   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
170   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
171   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
172   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
173   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
174   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
175   /*
176    * Inform roots about how many leaves and from which ranks
177    */
178   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
179   /* Determine number, sending ranks, and length of incoming */
180   for (i=0; i<sf->nranks; i++) {
181     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
182   }
183   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
184 
185   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
186      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
187      small and the sorting is cheap.
188    */
189   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
190 
191   /* Partition into distinguished and non-distinguished incoming ranks */
192   bas->ndiranks = sf->ndranks;
193   bas->niranks = bas->ndiranks + niranks;
194   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
195   bas->ioffset[0] = 0;
196   for (i=0; i<bas->ndiranks; i++) {
197     bas->iranks[i] = sf->ranks[i];
198     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
199   }
200   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
201   for ( ; i<bas->niranks; i++) {
202     bas->iranks[i] = iranks[i-bas->ndiranks];
203     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
204   }
205   bas->itotal = bas->ioffset[i];
206   ierr = PetscFree(rlengths);CHKERRQ(ierr);
207   ierr = PetscFree(iranks);CHKERRQ(ierr);
208   ierr = PetscFree(ilengths);CHKERRQ(ierr);
209 
210   /* Send leaf identities to roots */
211   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
212   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
213   for (i=bas->ndiranks; i<bas->niranks; i++) {
214     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
215   }
216   for (i=0; i<sf->nranks; i++) {
217     PetscMPIInt npoints;
218     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
219     if (i < sf->ndranks) {
220       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
221       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
222       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
223       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
224       continue;
225     }
226     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
227   }
228   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
229   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
230   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
231 
232   sf->selfleafdups    = PETSC_TRUE; /* The conservative assumption is there are data race */
233   sf->remoteleafdups  = PETSC_TRUE;
234   bas->selfrootdups   = PETSC_TRUE;
235   bas->remoterootdups = PETSC_TRUE;
236 
237   /* Setup packing optimization for roots and leaves */
238   ierr = PetscSFPackSetupOptimizations_Basic(sf);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
243 {
244   PetscErrorCode ierr;
245 
246   PetscFunctionBegin;
247   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
248   ierr = PetscOptionsTail();CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
253 {
254   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
255   PetscErrorCode    ierr;
256 
257   PetscFunctionBegin;
258   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
259   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
260   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
261 #if defined(PETSC_HAVE_CUDA)
262   if (bas->irootloc_d) {cudaError_t err = cudaFree(bas->irootloc_d);CHKERRCUDA(err);bas->irootloc_d=NULL;}
263 #endif
264   ierr = PetscSFPackDestroyAvailable(&bas->avail);CHKERRQ(ierr);
265   ierr = PetscSFPackDestroyOptimizations_Basic(sf);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
270 {
271   PetscErrorCode ierr;
272 
273   PetscFunctionBegin;
274   ierr = PetscSFReset(sf);CHKERRQ(ierr);
275   ierr = PetscFree(sf->data);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
280 {
281   PetscErrorCode ierr;
282   PetscBool      iascii;
283 
284   PetscFunctionBegin;
285   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
286   if (iascii) {ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
287   PetscFunctionReturn(0);
288 }
289 
290 static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
291 {
292   PetscErrorCode    ierr;
293   PetscSFPack       link;
294   const PetscInt    *rootloc = NULL;
295   MPI_Request       *rootreqs,*leafreqs;
296 
297   PetscFunctionBegin;
298   ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
299   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
300 
301   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
302   /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */
303   ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
304 
305   /* Do Isend */
306   ierr = PetscSFPackRootData(sf,link,rootloc,rootdata,PETSC_TRUE);CHKERRQ(ierr);
307   ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
308 
309   /* Do self to self communication via memcpy only when rootdata and leafdata are in different memory */
310   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
311   PetscFunctionReturn(0);
312 }
313 
314 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
315 {
316   PetscErrorCode    ierr;
317   PetscSFPack       link;
318   const PetscInt    *leafloc = NULL;
319 
320   PetscFunctionBegin;
321   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
322   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
323   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr);
324   ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafdata,op,PETSC_TRUE);CHKERRQ(ierr);
325   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 /* leaf -> root with reduction */
330 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
331 {
332   PetscErrorCode    ierr;
333   PetscSFPack       link;
334   const PetscInt    *leafloc = NULL;
335   MPI_Request       *rootreqs = NULL,*leafreqs = NULL; /* dummy values for compiler warnings about uninitialized value */
336 
337   PetscFunctionBegin;
338   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);
339 
340   ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
341   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
342   /* Eagerly post root receives for non-distinguished ranks */
343   ierr = MPI_Startall_irecv(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
344 
345   /* Pack and send leaf data */
346   ierr = PetscSFPackLeafData(sf,link,leafloc,leafdata,PETSC_TRUE);CHKERRQ(ierr);
347   ierr = MPI_Startall_isend(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
348 
349   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(rootmtype,leafmtype,link->selfbuf[rootmtype],link->selfbuf[leafmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
350   PetscFunctionReturn(0);
351 }
352 
353 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
354 {
355   PetscErrorCode    ierr;
356   PetscSFPack       link;
357   const PetscInt    *rootloc = NULL;
358 
359   PetscFunctionBegin;
360   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
361   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
362   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
363   ierr = PetscSFUnpackAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr);
364   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 
368 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
369 {
370   PetscErrorCode ierr;
371 
372   PetscFunctionBegin;
373   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
378 {
379   PetscErrorCode    ierr;
380   PetscSFPack       link;
381   const PetscInt    *rootloc = NULL,*leafloc = NULL;
382   MPI_Request       *rootreqs,*leafreqs;
383 
384   PetscFunctionBegin;
385   ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr);
386   ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr);
387   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
388   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
389   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
390   ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
391 
392   /* Post leaf receives */
393   ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr);
394 
395   /* Process local fetch-and-op, post root sends */
396   ierr = PetscSFFetchAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr);
397   ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr);
398   if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);}
399 
400   /* Unpack and insert fetched data into leaves */
401   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
402   ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafupdate,MPIU_REPLACE,PETSC_TRUE);CHKERRQ(ierr);
403   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
404   PetscFunctionReturn(0);
405 }
406 
407 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
408 {
409   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
410 
411   PetscFunctionBegin;
412   if (niranks)  *niranks  = bas->niranks;
413   if (iranks)   *iranks   = bas->iranks;
414   if (ioffset)  *ioffset  = bas->ioffset;
415   if (irootloc) *irootloc = bas->irootloc;
416   PetscFunctionReturn(0);
417 }
418 
419 /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
420    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
421    was sorted before calling the routine.
422  */
423 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
424 {
425   PetscSF           esf;
426   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count;
427   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
428   PetscMPIInt       *esf_ranks;
429   const PetscMPIInt *ranks,*iranks;
430   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer;
431   PetscBool         connected;
432   PetscSFPack       link;
433   PetscSFNode       *new_iremote;
434   PetscSF_Basic     *bas;
435   PetscErrorCode    ierr;
436 
437   PetscFunctionBegin;
438   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
439   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
440 
441   /* Find out which leaves are still connected to roots in the embedded sf */
442   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
443   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
444   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
445   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
446   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
447   /* Tag selected roots */
448   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
449 
450   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
451      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
452      interested in since it tells which leaves are connected to which ranks.
453    */
454   ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,rootdata,PETSC_MEMTYPE_HOST,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */
455   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
456   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
457   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
458   esf_nranks = esf_ndranks = connected_leaves = 0;
459   for (i=0; i<nranks; i++) {
460     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
461     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
462     count     = roffset[i+1] - roffset[i];
463     for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}}
464     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
465   }
466 
467   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
468   ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
469   ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
470   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr);
471   p    = 0; /* Counter for connected root ranks */
472   q    = 0; /* Counter for connected leaves */
473   esf_roffset[0] = 0;
474   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
475     buffer    = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]);
476     connected = PETSC_FALSE;
477     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
478         if (buffer[k]) {
479         esf_rmine[q]         = new_ilocal[q] = rmine[j];
480         esf_rremote[q]       = rremote[j];
481         new_iremote[q].index = rremote[j];
482         new_iremote[q].rank  = ranks[i];
483         connected            = PETSC_TRUE;
484         q++;
485       }
486     }
487     if (connected) {
488       esf_ranks[p]     = ranks[i];
489       esf_roffset[p+1] = q;
490       p++;
491     }
492   }
493 
494   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
495 
496   /* SetGraph internally resets the SF, so we only set its fields after the call */
497   ierr         = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
498   esf->nranks  = esf_nranks;
499   esf->ndranks = esf_ndranks;
500   esf->ranks   = esf_ranks;
501   esf->roffset = esf_roffset;
502   esf->rmine   = esf_rmine;
503   esf->rremote = esf_rremote;
504 
505   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
506   bas  = (PetscSF_Basic*)esf->data;
507   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
508   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
509      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
510    */
511   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
512   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
513   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
514   p = 0; /* Counter for connected leaf ranks */
515   q = 0; /* Counter for connected roots */
516   for (i=0; i<niranks; i++) {
517     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
518     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
519       PetscInt loc;
520       ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr);
521       if (loc >= 0) { /* Found in selected this root is connected */
522         bas->irootloc[q++] = irootloc[j];
523         connected = PETSC_TRUE;
524       }
525     }
526     if (connected) {
527       bas->niranks++;
528       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
529       bas->iranks[p]    = iranks[i];
530       bas->ioffset[p+1] = q;
531       p++;
532     }
533   }
534   bas->itotal = q;
535 
536   /* Setup packing optimizations */
537   ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr);
538   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
539 
540   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
541   *newsf = esf;
542   PetscFunctionReturn(0);
543 }
544 
545 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
546 {
547   PetscSF           esf;
548   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
549   const PetscInt    *ilocal,*ioffset,*irootloc,*buffer;
550   const PetscMPIInt *iranks;
551   PetscSFPack       link;
552   PetscSFNode       *new_iremote;
553   const PetscSFNode *iremote;
554   PetscSF_Basic     *bas;
555   MPI_Group         group;
556   PetscErrorCode    ierr;
557 
558   PetscFunctionBegin;
559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
560   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
561 
562   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
563   ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
564   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
565   ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
566   ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
567   for (i=0; i<nselected; i++) {
568     const PetscInt l     = selected[i];
569     new_ilocal[i]        = ilocal ? ilocal[l] : l;
570     new_iremote[i].rank  = iremote[l].rank;
571     new_iremote[i].index = iremote[l].index;
572   }
573 
574   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
575   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
576   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
577   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */
578 
579   ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
580 
581   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
582      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
583    */
584   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
585   ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr);
586   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
587 
588   /* Set up the incoming communication (i.e., recv info) */
589   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr);
590   bas  = (PetscSF_Basic*)esf->data;
591   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
592   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
593 
594   /* Pass info about selected leaves to root buffer */
595   ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,leafdata-minleaf,PETSC_MEMTYPE_HOST,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */
596   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
597   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
598 
599   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
600   p = 0; /* Counter for connected leaf ranks */
601   q = 0; /* Counter for connected roots */
602   for (i=0; i<niranks; i++) {
603     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
604     buffer = i < ndiranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->rootbuf[PETSC_MEMTYPE_HOST] + (ioffset[i] - ioffset[ndiranks]);
605     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
606       if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
607     }
608     if (connected) {
609       bas->niranks++;
610       if (i<ndiranks) bas->ndiranks++;
611       bas->iranks[p]    = iranks[i];
612       bas->ioffset[p+1] = q;
613       p++;
614     }
615   }
616   bas->itotal = q;
617   ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr);
618 
619   /* Setup packing optimizations */
620   ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr);
621   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
622 
623   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
624   *newsf = esf;
625   PetscFunctionReturn(0);
626 }
627 
628 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
629 {
630   PetscSF_Basic  *dat;
631   PetscErrorCode ierr;
632 
633   PetscFunctionBegin;
634   sf->ops->SetUp                = PetscSFSetUp_Basic;
635   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
636   sf->ops->Reset                = PetscSFReset_Basic;
637   sf->ops->Destroy              = PetscSFDestroy_Basic;
638   sf->ops->View                 = PetscSFView_Basic;
639   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
640   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
641   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
642   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
643   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
644   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
645   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
646   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
647   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
648 
649   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
650   sf->data = (void*)dat;
651   PetscFunctionReturn(0);
652 }
653