xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 40e23c03978c13ac666c3a9a3d12ba4068b9b111)
1 
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
3 
4 /*===================================================================================*/
5 /*              Internal routines for PetscSFPack_Basic                              */
6 /*===================================================================================*/
7 
8 /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been
9    initialized (since we use persistent requests), then initialize them.
10 */
11 static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs)
12 {
13   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
14   MPI_Request    *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half;
15   PetscInt       i,nrootranks,ndrootranks,nleafranks,ndleafranks;
16   const PetscInt *rootoffset,*leafoffset;
17   PetscMPIInt    n;
18   MPI_Comm       comm = PetscObjectComm((PetscObject)sf);
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22 
23   if (!link->initialized[direction]) {
24     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
25     ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
26     if (direction == PETSCSF_LEAF2ROOT_REDUCE) {
27       for (i=0; i<nrootranks; i++) {
28         if (i >= ndrootranks) {
29           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
30           ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
31         }
32       }
33       for (i=0; i<nleafranks; i++) {
34         if (i >= ndleafranks) {
35           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
36           ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
37         }
38       }
39     } else if (direction == PETSCSF_ROOT2LEAF_BCAST) {
40       for (i=0; i<nrootranks; i++) {
41         if (i >= ndrootranks) {
42           ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
43           ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr);
44         }
45       }
46       for (i=0; i<nleafranks; i++) {
47         if (i >= ndleafranks) {
48           ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
49           ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr);
50         }
51       }
52     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction);
53 
54     link->initialized[direction] = PETSC_TRUE;
55   }
56 
57   if (rootreqs) *rootreqs = requests;
58   if (leafreqs) *leafreqs = requests + bas->niranks;
59   PetscFunctionReturn(0);
60 }
61 
62 static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFDirection direction,PetscSFPack_Basic *mylink)
63 {
64   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
65   PetscErrorCode    ierr;
66   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
67   const PetscInt    *rootoffset,*leafoffset;
68   PetscSFPack       *p;
69   PetscSFPack_Basic link;
70 
71   PetscFunctionBegin;
72   /* Look for types in cache */
73   for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) {
74     PetscBool match;
75     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
76     if (match) {
77       *p = link->next; /* Remove from available list */
78       goto found;
79     }
80   }
81 
82   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
83   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr);
84   ierr = PetscNew(&link);CHKERRQ(ierr);
85   ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr);
86   ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr);
87   /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */
88   link->half = nrootranks + nleafranks;
89   ierr       = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr);
90   for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */
91   /* One tag per link */
92   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr);
93 
94   /* Allocate root and leaf buffers */
95   for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);}
96   for (i=0; i<nleafranks; i++) {
97     if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */
98       if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks");
99       link->leaf[i] = link->root[0];
100       continue;
101     }
102     ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr);
103   }
104 
105 found:
106   link->key  = key;
107   link->next = bas->inuse;
108   bas->inuse = (PetscSFPack)link;
109 
110   *mylink    = link;
111   PetscFunctionReturn(0);
112 }
113 
114 /*===================================================================================*/
115 /*              SF public interface implementations                                  */
116 /*===================================================================================*/
117 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
118 {
119   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
120   PetscErrorCode ierr;
121   PetscInt       *rlengths,*ilengths,i;
122   PetscMPIInt    rank,niranks,*iranks,tag;
123   MPI_Comm       comm;
124   MPI_Group      group;
125   MPI_Request    *rootreqs,*leafreqs;
126 
127   PetscFunctionBegin;
128   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr);
129   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
130   ierr = MPI_Group_free(&group);CHKERRQ(ierr);
131   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
132   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
133   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
134   /*
135    * Inform roots about how many leaves and from which ranks
136    */
137   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
138   /* Determine number, sending ranks, and length of incoming */
139   for (i=0; i<sf->nranks; i++) {
140     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
141   }
142   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
143 
144   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
145      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
146      small and the sorting is cheap.
147    */
148   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
149 
150   /* Partition into distinguished and non-distinguished incoming ranks */
151   bas->ndiranks = sf->ndranks;
152   bas->niranks = bas->ndiranks + niranks;
153   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
154   bas->ioffset[0] = 0;
155   for (i=0; i<bas->ndiranks; i++) {
156     bas->iranks[i] = sf->ranks[i];
157     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
158   }
159   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
160   for ( ; i<bas->niranks; i++) {
161     bas->iranks[i] = iranks[i-bas->ndiranks];
162     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
163   }
164   bas->itotal = bas->ioffset[i];
165   ierr = PetscFree(rlengths);CHKERRQ(ierr);
166   ierr = PetscFree(iranks);CHKERRQ(ierr);
167   ierr = PetscFree(ilengths);CHKERRQ(ierr);
168 
169   /* Send leaf identities to roots */
170   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
171   ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr);
172   for (i=bas->ndiranks; i<bas->niranks; i++) {
173     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr);
174   }
175   for (i=0; i<sf->nranks; i++) {
176     PetscMPIInt npoints;
177     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
178     if (i < sf->ndranks) {
179       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
180       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
181       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
182       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
183       continue;
184     }
185     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr);
186   }
187   ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
188   ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
189   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
190 
191   /* Setup packing optimization for root and leaf */
192   ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr);
193   ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
198 {
199   PetscErrorCode ierr;
200 
201   PetscFunctionBegin;
202   ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr);
203   ierr = PetscOptionsTail();CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
208 {
209   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
210   PetscErrorCode    ierr;
211   PetscSFPack_Basic link,next;
212 
213   PetscFunctionBegin;
214   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
215   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
216   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
217   for (link=(PetscSFPack_Basic)bas->avail; link; link=next) {
218     PetscInt i;
219     next = (PetscSFPack_Basic)link->next;
220     if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);}
221     for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);}
222     for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */
223     ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr);
224     /* Free persistent requests using MPI_Request_free */
225     for (i=0; i<link->half*2; i++) {
226       if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);}
227     }
228     ierr = PetscFree(link->requests);CHKERRQ(ierr);
229     ierr = PetscFree(link);CHKERRQ(ierr);
230   }
231   bas->avail = NULL;
232   ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr);
233   ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
238 {
239   PetscErrorCode ierr;
240 
241   PetscFunctionBegin;
242   ierr = PetscSFReset(sf);CHKERRQ(ierr);
243   ierr = PetscFree(sf->data);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
248 {
249   PetscErrorCode ierr;
250   PetscBool      iascii;
251 
252   PetscFunctionBegin;
253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
254   if (iascii) {
255     ierr = PetscViewerASCIIPrintf(viewer,"  sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
261 {
262   PetscErrorCode    ierr;
263   PetscSFPack_Basic link;
264   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
265   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
266   const PetscMPIInt *rootranks,*leafranks;
267   MPI_Request       *rootreqs,*leafreqs;
268   PetscMPIInt       n;
269   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
270 
271   PetscFunctionBegin;
272   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
273   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
274   ierr = PetscSFPackGet_Basic(sf,unit,rootdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr);
275 
276   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
277   /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */
278   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
279   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */
280 
281   /* Pack and send root data */
282   for (i=0; i<nrootranks; i++) {
283     void *packstart = link->root[i];
284     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
285     (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
286     if (i < ndrootranks) continue; /* shared memory */
287     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
288   }
289   PetscFunctionReturn(0);
290 }
291 
292 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
293 {
294   PetscErrorCode    ierr;
295   PetscSFPack_Basic link;
296   PetscInt          i,nleafranks,ndleafranks;
297   const PetscInt    *leafoffset,*leafloc;
298   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
299   PetscMPIInt       typesize = -1;
300 
301   PetscFunctionBegin;
302   ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
303   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
304   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
305   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
306 
307   if (UnpackAndOp) { typesize = link->unitbytes; }
308   else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); }
309 
310   for (i=0; i<nleafranks; i++) {
311     PetscMPIInt n   = leafoffset[i+1] - leafoffset[i];
312     char *packstart = (char *) link->leaf[i];
313     if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); }
314 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
315     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
316       PetscInt j;
317       for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); }
318     }
319 #else
320     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
321 #endif
322   }
323 
324   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 /* leaf -> root with reduction */
329 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
330 {
331   PetscSFPack_Basic link;
332   PetscErrorCode    ierr;
333   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
334   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
335   const PetscMPIInt *rootranks,*leafranks;
336   MPI_Request       *rootreqs,*leafreqs;
337   PetscMPIInt       n;
338 
339   PetscFunctionBegin;
340   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
341   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
342   ierr = PetscSFPackGet_Basic(sf,unit,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr);
343 
344   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr);
345   /* Eagerly post root receives for non-distinguished ranks */
346   ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr);
347   ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr);
348 
349   /* Pack and send leaf data */
350   for (i=0; i<nleafranks; i++) {
351     void *packstart = link->leaf[i];
352     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
353     (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart);
354     if (i < ndleafranks) continue; /* shared memory */
355     ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr);
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
361 {
362   PetscErrorCode    (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
363   PetscErrorCode    ierr;
364   PetscSFPack_Basic link;
365   PetscInt          i,nrootranks;
366   PetscMPIInt       typesize = -1;
367   const PetscInt    *rootoffset,*rootloc;
368   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
369 
370   PetscFunctionBegin;
371   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
372   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
373   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
374   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr);
375   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
376   if (UnpackAndOp) {
377     typesize = link->unitbytes;
378   }
379   else {
380     ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr);
381   }
382   for (i=0; i<nrootranks; i++) {
383     PetscMPIInt n   = rootoffset[i+1] - rootoffset[i];
384     char *packstart = (char *) link->root[i];
385 
386     if (UnpackAndOp) {
387       (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart);
388     }
389 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
390     else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
391       PetscInt j;
392 
393       for (j = 0; j < n; j++) {
394         ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr);
395       }
396     }
397 #else
398     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
399 #endif
400   }
401   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
406 {
407   PetscErrorCode ierr;
408 
409   PetscFunctionBegin;
410   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
411   PetscFunctionReturn(0);
412 }
413 
414 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
415 {
416   PetscErrorCode    (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);
417   PetscErrorCode    ierr;
418   PetscSFPack_Basic link;
419   PetscInt          i,nrootranks,ndrootranks,nleafranks,ndleafranks;
420   const PetscInt    *rootoffset,*leafoffset,*rootloc,*leafloc;
421   const PetscMPIInt *rootranks,*leafranks;
422   MPI_Request       *rootreqs,*leafreqs;
423   PetscMPIInt       n;
424   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
425 
426   PetscFunctionBegin;
427   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
428   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
429   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr);
430   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr);
431   ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr);
432 
433   ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr);
434   /* Post leaf receives */
435   ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr);
436   ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr);
437 
438   /* Process local fetch-and-op, post root sends */
439   ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr);
440   for (i=0; i<nrootranks; i++) {
441     void *packstart = link->root[i];
442     ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr);
443     (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart);
444     if (i < ndrootranks) continue; /* shared memory */
445     ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr);
446   }
447   ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr);
448   for (i=0; i<nleafranks; i++) {
449     const void  *packstart = link->leaf[i];
450     ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr);
451     (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart);
452   }
453   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
458 {
459   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
460 
461   PetscFunctionBegin;
462   if (niranks)  *niranks  = bas->niranks;
463   if (iranks)   *iranks   = bas->iranks;
464   if (ioffset)  *ioffset  = bas->ioffset;
465   if (irootloc) *irootloc = bas->irootloc;
466   PetscFunctionReturn(0);
467 }
468 
469 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
470 {
471   PetscSF_Basic  *dat;
472   PetscErrorCode ierr;
473 
474   PetscFunctionBegin;
475   sf->ops->SetUp                = PetscSFSetUp_Basic;
476   sf->ops->SetFromOptions       = PetscSFSetFromOptions_Basic;
477   sf->ops->Reset                = PetscSFReset_Basic;
478   sf->ops->Destroy              = PetscSFDestroy_Basic;
479   sf->ops->View                 = PetscSFView_Basic;
480   sf->ops->BcastAndOpBegin      = PetscSFBcastAndOpBegin_Basic;
481   sf->ops->BcastAndOpEnd        = PetscSFBcastAndOpEnd_Basic;
482   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
483   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
484   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
485   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
486   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
487 
488   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
489   sf->data = (void*)dat;
490   PetscFunctionReturn(0);
491 }
492