xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 1bb6d2a8d27998275780769967d0e939765fcbef)
1 
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3 
4 /*===================================================================================*/
5 /*              Internal routines for PetscSFPack_Basic                              */
6 /*===================================================================================*/
7 
8 /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
9    initialized (since we use persistent requests), then initialize them.
10 */
11 static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
12 {
13   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
14   MPI_Request    *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half;
15   PetscInt       i,nrootranks,ndrootranks,nleafranks,ndleafranks;
16   const PetscInt *rootoffset,*leafoffset;
17   PetscMPIInt    n;
18   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22 
23   if (!link->initialized[direction]) {
24     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
25     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
26     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
27       for (i=0; i<nrootranks; i++) {
28         if (i >= ndrootranks) {
29           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
30           ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
31         }
32       }
33       for (i=0; i<nleafranks; i++) {
34         if (i >= ndleafranks) {
35           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
36           ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
37         }
38       }
39     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
40       for (i=0; i<nrootranks; i++) {
41         if (i >= ndrootranks) {
42           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
43           ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
44         }
45       }
46       for (i=0; i<nleafranks; i++) {
47         if (i >= ndleafranks) {
48           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
49           ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
50         }
51       }
52     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction);
53 
54     link->initialized[direction] = PETSC_TRUE;
55   }
56 
57   if (rootreqs) *rootreqs = requests;
58   if (leafreqs) *leafreqs = requests + bas->niranks;
59   PetscFunctionReturn(0);
60 }
61 
62 static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *rkey,const void *lkey,PetscSFDirection direction,PetscSFPack_Basic *mylink)
63 {
64   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
65   PetscErrorCode    ierr;
66   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
67   const PetscInt    *rootoffset,*leafoffset;
68   PetscSFPack       *p;
69   PetscSFPack_Basic link;
70 
71   PetscFunctionBegin;
72   ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rkey,lkey);CHKERRQ(ierr);
73 
74   /* Look for types in cache */
75   for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) {
76     PetscBool match;
77     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
78     if (match) {
79       *p = link->next; /* Remove from available list */
80       goto found;
81     }
82   }
83 
84   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
85   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
86   ierr = PetscNew(&link);CHKERRQ(ierr);
87   ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr);
88   ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr);
89   /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */
90   link->half = nrootranks + nleafranks;
91   ierr       = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr);
92   for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */
93   /* One tag per link */
94   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr);
95 
96   /* Allocate root and leaf buffers */
97   for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);}
98   for (i=0; i<nleafranks; i++) {
99     if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */
100       if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks");
101       link->leaf[i] = link->root[0];
102       continue;
103     }
104     ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr);
105   }
106 
107 found:
108   link->rkey = rkey;
109   link->lkey = lkey;
110   link->next = bas->inuse;
111   bas->inuse = (PetscSFPack)link;
112 
113   *mylink    = link;
114   PetscFunctionReturn(0);
115 }
116 
117 /*===================================================================================*/
118 /*              SF public interface implementations                                  */
119 /*===================================================================================*/
120 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
121 {
122   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
123   PetscErrorCode ierr;
124   PetscInt       *rlengths,*ilengths,i;
125   PetscMPIInt    rank,niranks,*iranks,tag;
126   MPI_Comm       comm;
127   MPI_Group      group;
128   MPI_Request    *rootreqs,*leafreqs;
129 
130   PetscFunctionBegin;
131   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
132   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
133   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
134   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
135   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
136   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
137   /*
138    * Inform roots about how many leaves and from which ranks
139    */
140   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
141   /* Determine number, sending ranks, and length of incoming */
142   for (i=0; i<sf->nranks; i++) {
143     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
144   }
145   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
146 
147   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
148      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
149      small and the sorting is cheap.
150    */
151   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
152 
153   /* Partition into distinguished and non-distinguished incoming ranks */
154   bas->ndiranks = sf->ndranks;
155   bas->niranks = bas->ndiranks + niranks;
156   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
157   bas->ioffset[0] = 0;
158   for (i=0; i<bas->ndiranks; i++) {
159     bas->iranks[i] = sf->ranks[i];
160     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
161   }
162   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
163   for ( ; i<bas->niranks; i++) {
164     bas->iranks[i] = iranks[i-bas->ndiranks];
165     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
166   }
167   bas->itotal = bas->ioffset[i];
168   ierr = PetscFree(rlengths);CHKERRQ(ierr);
169   ierr = PetscFree(iranks);CHKERRQ(ierr);
170   ierr = PetscFree(ilengths);CHKERRQ(ierr);
171 
172   /* Send leaf identities to roots */
173   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
174   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
175   for (i=bas->ndiranks; i<bas->niranks; i++) {
176     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
177   }
178   for (i=0; i<sf->nranks; i++) {
179     PetscMPIInt npoints;
180     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
181     if (i < sf->ndranks) {
182       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
183       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
184       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
185       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
186       continue;
187     }
188     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
189   }
190   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
191   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
192   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
193 
194   /* Setup packing optimization for root and leaf */
195   ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr);
196   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
201 {
202   PetscErrorCode ierr;
203 
204   PetscFunctionBegin;
205   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
206   ierr = PetscOptionsTail();CHKERRQ(ierr);
207   PetscFunctionReturn(0);
208 }
209 
210 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
211 {
212   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
213   PetscErrorCode    ierr;
214   PetscSFPack_Basic link,next;
215 
216   PetscFunctionBegin;
217   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
218   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
219   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
220   for (link=(PetscSFPack_Basic)bas->avail; link; link=next) {
221     PetscInt i;
222     next = (PetscSFPack_Basic)link->next;
223     if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);}
224     for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);}
225     for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */
226     ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr);
227     /* Free persistent requests using MPI_Request_free */
228     for (i=0; i<link->half*2; i++) {
229       if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);}
230     }
231     ierr = PetscFree(link->requests);CHKERRQ(ierr);
232     ierr = PetscFree(link);CHKERRQ(ierr);
233   }
234   bas->avail = NULL;
235   ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr);
236   ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
241 {
242   PetscErrorCode ierr;
243 
244   PetscFunctionBegin;
245   ierr = PetscSFReset(sf);CHKERRQ(ierr);
246   ierr = PetscFree(sf->data);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
251 {
252   PetscErrorCode ierr;
253   PetscBool      iascii;
254 
255   PetscFunctionBegin;
256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
257   if (iascii) {
258     ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
264 {
265   PetscErrorCode    ierr;
266   PetscSFPack_Basic link;
267   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
268   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
269   const PetscMPIInt *rootranks,*leafranks;
270   MPI_Request       *rootreqs,*leafreqs;
271   PetscMPIInt       n;
272   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
273 
274   PetscFunctionBegin;
275   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
276   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
277   ierr = PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
278 
279   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
280   /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */
281   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
282   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */
283 
284   /* Pack and send root data */
285   for (i=0; i<nrootranks; i++) {
286     void *packstart = link->root[i];
287     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
288     (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
289     if (i < ndrootranks) continue; /* shared memory */
290     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
291   }
292   PetscFunctionReturn(0);
293 }
294 
295 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
296 {
297   PetscErrorCode    ierr;
298   PetscSFPack_Basic link;
299   PetscInt          i,nleafranks,ndleafranks;
300   const PetscInt    *leafoffset,*leafloc;
301   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
302   PetscMPIInt       typesize = -1;
303 
304   PetscFunctionBegin;
305   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
306   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
307   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
308   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
309 
310   if (UnpackAndOp) { typesize = link->unitbytes; }
311   else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); }
312 
313   for (i=0; i<nleafranks; i++) {
314     PetscMPIInt n   = leafoffset[i+1] - leafoffset[i];
315     char *packstart = (char *) link->leaf[i];
316     if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); }
317 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
318     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
319       PetscInt j;
320       for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); }
321     }
322 #else
323     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
324 #endif
325   }
326 
327   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 /* leaf -> root with reduction */
332 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
333 {
334   PetscSFPack_Basic link;
335   PetscErrorCode    ierr;
336   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
337   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
338   const PetscMPIInt *rootranks,*leafranks;
339   MPI_Request       *rootreqs,*leafreqs;
340   PetscMPIInt       n;
341 
342   PetscFunctionBegin;
343   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
344   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
345   ierr = PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
346 
347   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
348   /* Eagerly post root receives for non-distinguished ranks */
349   ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr);
350   ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr);
351 
352   /* Pack and send leaf data */
353   for (i=0; i<nleafranks; i++) {
354     void *packstart = link->leaf[i];
355     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
356     (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart);
357     if (i < ndleafranks) continue; /* shared memory */
358     ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr);
359   }
360   PetscFunctionReturn(0);
361 }
362 
363 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
364 {
365   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
366   PetscErrorCode    ierr;
367   PetscSFPack_Basic link;
368   PetscInt          i,nrootranks;
369   PetscMPIInt       typesize = -1;
370   const PetscInt    *rootoffset,*rootloc;
371   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
372 
373   PetscFunctionBegin;
374   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
375   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
376   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
377   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr);
378   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
379   if (UnpackAndOp) {
380     typesize = link->unitbytes;
381   }
382   else {
383     ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr);
384   }
385   for (i=0; i<nrootranks; i++) {
386     PetscMPIInt n   = rootoffset[i+1] - rootoffset[i];
387     char *packstart = (char *) link->root[i];
388 
389     if (UnpackAndOp) {
390       (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart);
391     }
392 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
393     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
394       PetscInt j;
395 
396       for (j = 0; j < n; j++) {
397         ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr);
398       }
399     }
400 #else
401     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
402 #endif
403   }
404   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
409 {
410   PetscErrorCode ierr;
411 
412   PetscFunctionBegin;
413   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
418 {
419   PetscErrorCode    (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);
420   PetscErrorCode    ierr;
421   PetscSFPack_Basic link;
422   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
423   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
424   const PetscMPIInt *rootranks,*leafranks;
425   MPI_Request       *rootreqs,*leafreqs;
426   PetscMPIInt       n;
427   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
428 
429   PetscFunctionBegin;
430   ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
431   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
432   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
433   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
434   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
435 
436   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
437   /* Post leaf receives */
438   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
439   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr);
440 
441   /* Process local fetch-and-op, post root sends */
442   ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr);
443   for (i=0; i<nrootranks; i++) {
444     void *packstart = link->root[i];
445     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
446     (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
447     if (i < ndrootranks) continue; /* shared memory */
448     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
449   }
450   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
451   for (i=0; i<nleafranks; i++) {
452     const void  *packstart = link->leaf[i];
453     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
454     (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart);
455   }
456   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
461 {
462   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
463 
464   PetscFunctionBegin;
465   if (niranks)  *niranks  = bas->niranks;
466   if (iranks)   *iranks   = bas->iranks;
467   if (ioffset)  *ioffset  = bas->ioffset;
468   if (irootloc) *irootloc = bas->irootloc;
469   PetscFunctionReturn(0);
470 }
471 
472 /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
473    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
474    was sorted before calling the routine.
475  */
476 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
477 {
478   PetscSF           esf;
479   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
480   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*leafbuf,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
481   PetscMPIInt       *esf_ranks;
482   const PetscMPIInt *ranks,*iranks;
483   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
484   PetscBool         connected;
485   PetscSFPack_Basic link;
486   PetscSFNode       *new_iremote;
487   PetscSF_Basic     *bas;
488   PetscErrorCode    ierr;
489 
490   PetscFunctionBegin;
491   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
492   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
493 
494   /* Find out which leaves are still connected to roots in the embedded sf */
495   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
496   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
497   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
498   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
499   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
500   /* Tag selected roots */
501   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
502 
503   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
504      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
505      interested in since it tells which leaves are connected to which ranks.
506    */
507   ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,rootdata,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */
508   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
509   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
510   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
511   esf_nranks = esf_ndranks = connected_leaves = 0;
512   for (i=0; i<nranks; i++) { /* Scan leaf data to calculate some counts */
513     leafbuf   = (PetscInt*)link->leaf[i];
514     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
515     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
516       if (leafbuf[k]) {
517         connected_leaves++;
518         connected   = PETSC_TRUE;
519       }
520     }
521     if (connected) {esf_nranks++; if (i<ndranks) esf_ndranks++;}
522   }
523 
524   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
525   ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
526   ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
527   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr);
528   p    = 0; /* Counter for connected root ranks */
529   q    = 0; /* Counter for connected leaves */
530   esf_roffset[0] = 0;
531   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
532     leafbuf   = (PetscInt*)link->leaf[i];
533     connected = PETSC_FALSE;
534     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
535         if (leafbuf[k]) {
536         esf_rmine[q]         = new_ilocal[q] = rmine[j];
537         esf_rremote[q]       = rremote[j];
538         new_iremote[q].index = rremote[j];
539         new_iremote[q].rank  = ranks[i];
540         connected            = PETSC_TRUE;
541         q++;
542       }
543     }
544     if (connected) {
545       esf_ranks[p]     = ranks[i];
546       esf_roffset[p+1] = q;
547       p++;
548     }
549   }
550 
551   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
552 
553   /* SetGraph internally resets the SF, so we only set its fields after the call */
554   ierr         = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
555   esf->nranks  = esf_nranks;
556   esf->ndranks = esf_ndranks;
557   esf->ranks   = esf_ranks;
558   esf->roffset = esf_roffset;
559   esf->rmine   = esf_rmine;
560   esf->rremote = esf_rremote;
561 
562   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
563   bas  = (PetscSF_Basic*)esf->data;
564   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
565   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
566      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
567    */
568   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
569   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
570   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
571   p = 0; /* Counter for connected leaf ranks */
572   q = 0; /* Counter for connected roots */
573   for (i=0; i<niranks; i++) {
574     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
575     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
576       PetscInt loc;
577       ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr);
578       if (loc >= 0) { /* Found in selected this root is connected */
579         bas->irootloc[q++] = irootloc[j];
580         connected = PETSC_TRUE;
581       }
582     }
583     if (connected) {
584       bas->niranks++;
585       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
586       bas->iranks[p]    = iranks[i];
587       bas->ioffset[p+1] = q;
588       p++;
589     }
590   }
591   bas->itotal = q;
592 
593   /* Setup packing optimizations */
594   ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr);
595   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
596   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
597 
598   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
599   *newsf = esf;
600   PetscFunctionReturn(0);
601 }
602 
603 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
604 {
605   PetscSF           esf;
606   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
607   const PetscInt    *ilocal,*ioffset,*irootloc;
608   const PetscMPIInt *iranks;
609   PetscSFPack_Basic link;
610   PetscSFNode       *new_iremote;
611   const PetscSFNode *iremote;
612   PetscSF_Basic     *bas;
613   MPI_Group         group;
614   PetscErrorCode    ierr;
615 
616   PetscFunctionBegin;
617   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
618   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
619 
620   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
621   ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
622   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
623   ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
624   ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
625   for (i=0; i<nselected; i++) {
626     const PetscInt l     = selected[i];
627     new_ilocal[i]        = ilocal ? ilocal[l] : l;
628     new_iremote[i].rank  = iremote[l].rank;
629     new_iremote[i].index = iremote[l].index;
630   }
631 
632   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
633   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
634   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
635   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */
636 
637   ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
638 
639   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
640      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
641    */
642   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
643   ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr);
644   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
645 
646   /* Set up the incoming communication (i.e., recv info) */
647   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr);
648   bas  = (PetscSF_Basic*)esf->data;
649   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
650   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
651 
652   /* Pass info about selected leaves to root buffer */
653   ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,leafdata-minleaf,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */
654   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
655   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
656 
657   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
658   p = 0; /* Counter for connected leaf ranks */
659   q = 0; /* Counter for connected roots */
660   for (i=0; i<niranks; i++) {
661     PetscInt *rootbuf = (PetscInt*)link->root[i];
662     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
663     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
664       if (rootbuf[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
665     }
666     if (connected) {
667       bas->niranks++;
668       if (i<ndiranks) bas->ndiranks++;
669       bas->iranks[p]    = iranks[i];
670       bas->ioffset[p+1] = q;
671       p++;
672     }
673   }
674   bas->itotal = q;
675   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
676 
677   /* Setup packing optimizations */
678   ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr);
679   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
680   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
681 
682   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
683   *newsf = esf;
684   PetscFunctionReturn(0);
685 }
686 
687 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
688 {
689   PetscSF_Basic  *dat;
690   PetscErrorCode ierr;
691 
692   PetscFunctionBegin;
693   sf->ops->SetUp                = PetscSFSetUp_Basic;
694   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
695   sf->ops->Reset                = PetscSFReset_Basic;
696   sf->ops->Destroy              = PetscSFDestroy_Basic;
697   sf->ops->View                 = PetscSFView_Basic;
698   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
699   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
700   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
701   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
702   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
703   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
704   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
705   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
706   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
707 
708   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
709   sf->data = (void*)dat;
710   PetscFunctionReturn(0);
711 }
712