xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision f659e5c7ab26e3388857cea03772891fbaf11827)
1 
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3 
4 /*===================================================================================*/
5 /*              Internal routines for PetscSFPack_Basic                              */
6 /*===================================================================================*/
7 
8 /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
9    initialized (since we use persistent requests), then initialize them.
10 */
11 static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
12 {
13   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
14   MPI_Request    *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half;
15   PetscInt       i,nrootranks,ndrootranks,nleafranks,ndleafranks;
16   const PetscInt *rootoffset,*leafoffset;
17   PetscMPIInt    n;
18   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22 
23   if (!link->initialized[direction]) {
24     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
25     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
26     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
27       for (i=0; i<nrootranks; i++) {
28         if (i >= ndrootranks) {
29           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
30           ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
31         }
32       }
33       for (i=0; i<nleafranks; i++) {
34         if (i >= ndleafranks) {
35           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
36           ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
37         }
38       }
39     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
40       for (i=0; i<nrootranks; i++) {
41         if (i >= ndrootranks) {
42           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
43           ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
44         }
45       }
46       for (i=0; i<nleafranks; i++) {
47         if (i >= ndleafranks) {
48           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
49           ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
50         }
51       }
52     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction);
53 
54     link->initialized[direction] = PETSC_TRUE;
55   }
56 
57   if (rootreqs) *rootreqs = requests;
58   if (leafreqs) *leafreqs = requests + bas->niranks;
59   PetscFunctionReturn(0);
60 }
61 
62 static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFDirection direction,PetscSFPack_Basic *mylink)
63 {
64   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
65   PetscErrorCode    ierr;
66   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
67   const PetscInt    *rootoffset,*leafoffset;
68   PetscSFPack       *p;
69   PetscSFPack_Basic link;
70 
71   PetscFunctionBegin;
72   /* Look for types in cache */
73   for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) {
74     PetscBool match;
75     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
76     if (match) {
77       *p = link->next; /* Remove from available list */
78       goto found;
79     }
80   }
81 
82   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
83   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
84   ierr = PetscNew(&link);CHKERRQ(ierr);
85   ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr);
86   ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr);
87   /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */
88   link->half = nrootranks + nleafranks;
89   ierr       = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr);
90   for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */
91   /* One tag per link */
92   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr);
93 
94   /* Allocate root and leaf buffers */
95   for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);}
96   for (i=0; i<nleafranks; i++) {
97     if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */
98       if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks");
99       link->leaf[i] = link->root[0];
100       continue;
101     }
102     ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr);
103   }
104 
105 found:
106   link->key  = key;
107   link->next = bas->inuse;
108   bas->inuse = (PetscSFPack)link;
109 
110   *mylink    = link;
111   PetscFunctionReturn(0);
112 }
113 
114 /*===================================================================================*/
115 /*              SF public interface implementations                                  */
116 /*===================================================================================*/
117 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
118 {
119   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
120   PetscErrorCode ierr;
121   PetscInt       *rlengths,*ilengths,i;
122   PetscMPIInt    rank,niranks,*iranks,tag;
123   MPI_Comm       comm;
124   MPI_Group      group;
125   MPI_Request    *rootreqs,*leafreqs;
126 
127   PetscFunctionBegin;
128   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
129   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
130   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
131   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
132   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
133   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
134   /*
135    * Inform roots about how many leaves and from which ranks
136    */
137   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
138   /* Determine number, sending ranks, and length of incoming */
139   for (i=0; i<sf->nranks; i++) {
140     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
141   }
142   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
143 
144   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
145      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
146      small and the sorting is cheap.
147    */
148   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
149 
150   /* Partition into distinguished and non-distinguished incoming ranks */
151   bas->ndiranks = sf->ndranks;
152   bas->niranks = bas->ndiranks + niranks;
153   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
154   bas->ioffset[0] = 0;
155   for (i=0; i<bas->ndiranks; i++) {
156     bas->iranks[i] = sf->ranks[i];
157     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
158   }
159   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
160   for ( ; i<bas->niranks; i++) {
161     bas->iranks[i] = iranks[i-bas->ndiranks];
162     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
163   }
164   bas->itotal = bas->ioffset[i];
165   ierr = PetscFree(rlengths);CHKERRQ(ierr);
166   ierr = PetscFree(iranks);CHKERRQ(ierr);
167   ierr = PetscFree(ilengths);CHKERRQ(ierr);
168 
169   /* Send leaf identities to roots */
170   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
171   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
172   for (i=bas->ndiranks; i<bas->niranks; i++) {
173     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
174   }
175   for (i=0; i<sf->nranks; i++) {
176     PetscMPIInt npoints;
177     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
178     if (i < sf->ndranks) {
179       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
180       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
181       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
182       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
183       continue;
184     }
185     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
186   }
187   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
188   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
189   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
190 
191   /* Setup packing optimization for root and leaf */
192   ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr);
193   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
198 {
199   PetscErrorCode ierr;
200 
201   PetscFunctionBegin;
202   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
203   ierr = PetscOptionsTail();CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
208 {
209   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
210   PetscErrorCode    ierr;
211   PetscSFPack_Basic link,next;
212 
213   PetscFunctionBegin;
214   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
215   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
216   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
217   for (link=(PetscSFPack_Basic)bas->avail; link; link=next) {
218     PetscInt i;
219     next = (PetscSFPack_Basic)link->next;
220     if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);}
221     for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);}
222     for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */
223     ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr);
224     /* Free persistent requests using MPI_Request_free */
225     for (i=0; i<link->half*2; i++) {
226       if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);}
227     }
228     ierr = PetscFree(link->requests);CHKERRQ(ierr);
229     ierr = PetscFree(link);CHKERRQ(ierr);
230   }
231   bas->avail = NULL;
232   ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr);
233   ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
238 {
239   PetscErrorCode ierr;
240 
241   PetscFunctionBegin;
242   ierr = PetscSFReset(sf);CHKERRQ(ierr);
243   ierr = PetscFree(sf->data);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
248 {
249   PetscErrorCode ierr;
250   PetscBool      iascii;
251 
252   PetscFunctionBegin;
253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
254   if (iascii) {
255     ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
261 {
262   PetscErrorCode    ierr;
263   PetscSFPack_Basic link;
264   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
265   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
266   const PetscMPIInt *rootranks,*leafranks;
267   MPI_Request       *rootreqs,*leafreqs;
268   PetscMPIInt       n;
269   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
270 
271   PetscFunctionBegin;
272   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
273   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
274   ierr = PetscSFPackGet_Basic(sf,unit,rootdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
275 
276   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
277   /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */
278   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
279   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */
280 
281   /* Pack and send root data */
282   for (i=0; i<nrootranks; i++) {
283     void *packstart = link->root[i];
284     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
285     (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
286     if (i < ndrootranks) continue; /* shared memory */
287     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
288   }
289   PetscFunctionReturn(0);
290 }
291 
292 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
293 {
294   PetscErrorCode    ierr;
295   PetscSFPack_Basic link;
296   PetscInt          i,nleafranks,ndleafranks;
297   const PetscInt    *leafoffset,*leafloc;
298   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
299   PetscMPIInt       typesize = -1;
300 
301   PetscFunctionBegin;
302   ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
303   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
304   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
305   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
306 
307   if (UnpackAndOp) { typesize = link->unitbytes; }
308   else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); }
309 
310   for (i=0; i<nleafranks; i++) {
311     PetscMPIInt n   = leafoffset[i+1] - leafoffset[i];
312     char *packstart = (char *) link->leaf[i];
313     if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); }
314 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
315     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
316       PetscInt j;
317       for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); }
318     }
319 #else
320     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
321 #endif
322   }
323 
324   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 /* leaf -> root with reduction */
329 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
330 {
331   PetscSFPack_Basic link;
332   PetscErrorCode    ierr;
333   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
334   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
335   const PetscMPIInt *rootranks,*leafranks;
336   MPI_Request       *rootreqs,*leafreqs;
337   PetscMPIInt       n;
338 
339   PetscFunctionBegin;
340   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
341   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
342   ierr = PetscSFPackGet_Basic(sf,unit,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
343 
344   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
345   /* Eagerly post root receives for non-distinguished ranks */
346   ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr);
347   ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr);
348 
349   /* Pack and send leaf data */
350   for (i=0; i<nleafranks; i++) {
351     void *packstart = link->leaf[i];
352     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
353     (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart);
354     if (i < ndleafranks) continue; /* shared memory */
355     ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr);
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
361 {
362   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
363   PetscErrorCode    ierr;
364   PetscSFPack_Basic link;
365   PetscInt          i,nrootranks;
366   PetscMPIInt       typesize = -1;
367   const PetscInt    *rootoffset,*rootloc;
368   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
369 
370   PetscFunctionBegin;
371   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
372   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
373   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
374   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr);
375   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
376   if (UnpackAndOp) {
377     typesize = link->unitbytes;
378   }
379   else {
380     ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr);
381   }
382   for (i=0; i<nrootranks; i++) {
383     PetscMPIInt n   = rootoffset[i+1] - rootoffset[i];
384     char *packstart = (char *) link->root[i];
385 
386     if (UnpackAndOp) {
387       (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart);
388     }
389 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
390     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
391       PetscInt j;
392 
393       for (j = 0; j < n; j++) {
394         ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr);
395       }
396     }
397 #else
398     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
399 #endif
400   }
401   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
406 {
407   PetscErrorCode ierr;
408 
409   PetscFunctionBegin;
410   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
411   PetscFunctionReturn(0);
412 }
413 
414 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
415 {
416   PetscErrorCode    (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);
417   PetscErrorCode    ierr;
418   PetscSFPack_Basic link;
419   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
420   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
421   const PetscMPIInt *rootranks,*leafranks;
422   MPI_Request       *rootreqs,*leafreqs;
423   PetscMPIInt       n;
424   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
425 
426   PetscFunctionBegin;
427   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
428   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
429   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
430   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
431   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
432 
433   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
434   /* Post leaf receives */
435   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
436   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr);
437 
438   /* Process local fetch-and-op, post root sends */
439   ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr);
440   for (i=0; i<nrootranks; i++) {
441     void *packstart = link->root[i];
442     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
443     (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
444     if (i < ndrootranks) continue; /* shared memory */
445     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
446   }
447   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
448   for (i=0; i<nleafranks; i++) {
449     const void  *packstart = link->leaf[i];
450     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
451     (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart);
452   }
453   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
458 {
459   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
460 
461   PetscFunctionBegin;
462   if (niranks)  *niranks  = bas->niranks;
463   if (iranks)   *iranks   = bas->iranks;
464   if (ioffset)  *ioffset  = bas->ioffset;
465   if (irootloc) *irootloc = bas->irootloc;
466   PetscFunctionReturn(0);
467 }
468 
469 /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf.
470    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
471    was sorted before calling the routine.
472  */
473 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
474 {
475   PetscSF           esf;
476   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
477   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*leafbuf,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
478   PetscMPIInt       *esf_ranks;
479   const PetscMPIInt *ranks,*iranks;
480   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
481   PetscBool         connected;
482   PetscSFPack_Basic link;
483   PetscSFNode       *new_iremote;
484   PetscSF_Basic     *bas;
485   PetscErrorCode    ierr;
486 
487   PetscFunctionBegin;
488   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
489   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
490 
491   /* Find out which leaves are still connected to roots in the embedded sf */
492   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
493   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
494   /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */
495   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
496   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
497   /* Tag selected roots */
498   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
499 
500   /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in
501      sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are
502      interested in since it tells which leaves are connected to which ranks.
503    */
504   ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,rootdata,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */
505   ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
506   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
507   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
508   esf_nranks = esf_ndranks = connected_leaves = 0;
509   for (i=0; i<nranks; i++) { /* Scan leaf data to calculate some counts */
510     leafbuf   = (PetscInt*)link->leaf[i];
511     connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */
512     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
513       if (leafbuf[k]) {
514         connected_leaves++;
515         connected   = PETSC_TRUE;
516       }
517     }
518     if (connected) {esf_nranks++; if (i<ndranks) esf_ndranks++;}
519   }
520 
521   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
522   ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
523   ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
524   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr);
525   p    = 0; /* Counter for connected root ranks */
526   q    = 0; /* Counter for connected leaves */
527   esf_roffset[0] = 0;
528   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
529     leafbuf   = (PetscInt*)link->leaf[i];
530     connected = PETSC_FALSE;
531     for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) {
532         if (leafbuf[k]) {
533         esf_rmine[q]         = new_ilocal[q] = rmine[j];
534         esf_rremote[q]       = rremote[j];
535         new_iremote[q].index = rremote[j];
536         new_iremote[q].rank  = ranks[i];
537         connected            = PETSC_TRUE;
538         q++;
539       }
540     }
541     if (connected) {
542       esf_ranks[p]     = ranks[i];
543       esf_roffset[p+1] = q;
544       p++;
545     }
546   }
547 
548   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
549 
550   /* SetGraph internally resets the SF, so we only set its fields after the call */
551   ierr         = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
552   esf->nranks  = esf_nranks;
553   esf->ndranks = esf_ndranks;
554   esf->ranks   = esf_ranks;
555   esf->roffset = esf_roffset;
556   esf->rmine   = esf_rmine;
557   esf->rremote = esf_rremote;
558 
559   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
560   bas  = (PetscSF_Basic*)esf->data;
561   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
562   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
563      expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once.
564    */
565   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
566   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
567   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
568   p = 0; /* Counter for connected leaf ranks */
569   q = 0; /* Counter for connected roots */
570   for (i=0; i<niranks; i++) {
571     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
572     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
573       PetscInt loc;
574       ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr);
575       if (loc >= 0) { /* Found in selected this root is connected */
576         bas->irootloc[q++] = irootloc[j];
577         connected = PETSC_TRUE;
578       }
579     }
580     if (connected) {
581       bas->niranks++;
582       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
583       bas->iranks[p]    = iranks[i];
584       bas->ioffset[p+1] = q;
585       p++;
586     }
587   }
588   bas->itotal = q;
589 
590   /* Setup packing optimizations */
591   ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr);
592   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
593   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
594 
595   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
596   *newsf = esf;
597   PetscFunctionReturn(0);
598 }
599 
600 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
601 {
602   PetscSF           esf;
603   PetscInt          i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal;
604   const PetscInt    *ilocal,*ioffset,*irootloc;
605   const PetscMPIInt *iranks;
606   PetscSFPack_Basic link;
607   PetscSFNode       *new_iremote;
608   const PetscSFNode *iremote;
609   PetscSF_Basic     *bas;
610   MPI_Group         group;
611   PetscErrorCode    ierr;
612 
613   PetscFunctionBegin;
614   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
615   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
616 
617   /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */
618   ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
619   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
620   ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
621   ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
622   for (i=0; i<nselected; i++) {
623     const PetscInt l     = selected[i];
624     new_ilocal[i]        = ilocal ? ilocal[l] : l;
625     new_iremote[i].rank  = iremote[l].rank;
626     new_iremote[i].index = iremote[l].index;
627   }
628 
629   /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */
630   maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */
631   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr);
632   for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */
633 
634   ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
635 
636   /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to
637      map rmine[i] (ilocal of leaves) back to selected[j]  (leaf indices).
638    */
639   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
640   ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr);
641   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
642 
643   /* Set up the incoming communication (i.e., recv info) */
644   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr);
645   bas  = (PetscSF_Basic*)esf->data;
646   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
647   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
648 
649   /* Pass info about selected leaves to root buffer */
650   ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,leafdata-minleaf,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */
651   ierr = PetscSFPackGetInUse(sf,MPIU_INT,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
652   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
653 
654   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
655   p = 0; /* Counter for connected leaf ranks */
656   q = 0; /* Counter for connected roots */
657   for (i=0; i<niranks; i++) {
658     PetscInt *rootbuf = (PetscInt*)link->root[i];
659     PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
660     for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) {
661       if (rootbuf[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;}
662     }
663     if (connected) {
664       bas->niranks++;
665       if (i<ndiranks) bas->ndiranks++;
666       bas->iranks[p]    = iranks[i];
667       bas->ioffset[p+1] = q;
668       p++;
669     }
670   }
671   bas->itotal = q;
672   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
673 
674   /* Setup packing optimizations */
675   ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr);
676   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
677   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
678 
679   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
680   *newsf = esf;
681   PetscFunctionReturn(0);
682 }
683 
684 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
685 {
686   PetscSF_Basic  *dat;
687   PetscErrorCode ierr;
688 
689   PetscFunctionBegin;
690   sf->ops->SetUp                = PetscSFSetUp_Basic;
691   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
692   sf->ops->Reset                = PetscSFReset_Basic;
693   sf->ops->Destroy              = PetscSFDestroy_Basic;
694   sf->ops->View                 = PetscSFView_Basic;
695   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
696   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
697   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
698   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
699   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
700   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
701   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
702   sf->ops->CreateEmbeddedSF     = PetscSFCreateEmbeddedSF_Basic;
703   sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic;
704 
705   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
706   sf->data = (void*)dat;
707   PetscFunctionReturn(0);
708 }
709