xref: /petsc/src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.c (revision dd5b3ca66ca4de5e00e985c205d0f80060c552e8)
1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2 
3 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gatherv(PetscSF,MPI_Datatype,const void*,void*,MPI_Op);
4 
5 /*===================================================================================*/
6 /*              Internal routines for PetscSFPack_Allgatherv                         */
7 /*===================================================================================*/
8 PETSC_INTERN PetscErrorCode PetscSFPackGet_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFPack_Allgatherv *mylink)
9 {
10   PetscSF_Allgatherv     *dat = (PetscSF_Allgatherv*)sf->data;
11   PetscSFPack_Allgatherv link;
12   PetscSFPack            *p;
13   PetscErrorCode         ierr;
14 
15   PetscFunctionBegin;
16   /* Look for types in cache */
17   for (p=&dat->avail; (link=(PetscSFPack_Allgatherv)*p); p=&link->next) {
18     PetscBool match;
19     ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr);
20     if (match) {
21       *p = link->next; /* Remove from available list */
22       goto found;
23     }
24   }
25 
26   ierr = PetscNew(&link);CHKERRQ(ierr);
27   ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr);
28   /* Must be init'ed to NULL so that we can safely wait on them even they are not used */
29   link->request = MPI_REQUEST_NULL;
30   /* One tag per link */
31   ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr);
32 
33   /* DO NOT allocate link->root/leaf. We use lazy allocation since these buffers are likely not needed */
34 found:
35   link->key  = key;
36   link->next = dat->inuse;
37   dat->inuse = (PetscSFPack)link;
38 
39   *mylink     = link;
40   PetscFunctionReturn(0);
41 }
42 
43 /*===================================================================================*/
44 /*              Implementations of SF public APIs                                    */
45 /*===================================================================================*/
46 
47 /* PetscSFGetGraph is non-collective. An implementation should not have collective calls */
48 PETSC_INTERN PetscErrorCode PetscSFGetGraph_Allgatherv(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
49 {
50   PetscErrorCode ierr;
51   PetscInt       i,j,k;
52   const PetscInt *range;
53   PetscMPIInt    size;
54 
55   PetscFunctionBegin;
56   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
57   if (nroots)  *nroots  = sf->nroots;
58   if (nleaves) *nleaves = sf->nleaves;
59   if (ilocal)  *ilocal  = NULL; /* Contiguous leaves */
60   if (iremote) {
61     if (!sf->remote && sf->nleaves) { /* The && sf->nleaves makes sfgatherv able to inherit this routine */
62       ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr);
63       ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr);
64       sf->remote_alloc = sf->remote;
65       for (i=0; i<size; i++) {
66         for (j=range[i],k=0; j<range[i+1]; j++,k++) {
67           sf->remote[j].rank  = i;
68           sf->remote[j].index = k;
69         }
70       }
71     }
72     *iremote = sf->remote;
73   }
74   PetscFunctionReturn(0);
75 }
76 
77 PETSC_INTERN PetscErrorCode PetscSFSetUp_Allgatherv(PetscSF sf)
78 {
79   PetscErrorCode     ierr;
80   PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
81   PetscMPIInt        size;
82   PetscInt           i;
83   const PetscInt     *range;
84 
85   PetscFunctionBegin;
86   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
87   if (sf->nleaves) { /* This if (sf->nleaves) test makes sfgatherv able to inherit this routine */
88     ierr = PetscMalloc1(size,&dat->recvcounts);CHKERRQ(ierr);
89     ierr = PetscMalloc1(size,&dat->displs);CHKERRQ(ierr);
90     ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr);
91 
92     for (i=0; i<size; i++) {
93       ierr = PetscMPIIntCast(range[i],&dat->displs[i]);CHKERRQ(ierr);
94       ierr = PetscMPIIntCast(range[i+1]-range[i],&dat->recvcounts[i]);CHKERRQ(ierr);
95     }
96   }
97   PetscFunctionReturn(0);
98 }
99 
100 PETSC_INTERN PetscErrorCode PetscSFReset_Allgatherv(PetscSF sf)
101 {
102   PetscSF_Allgatherv     *dat = (PetscSF_Allgatherv*)sf->data;
103   PetscSFPack_Allgatherv link,next;
104   PetscErrorCode         ierr;
105 
106   PetscFunctionBegin;
107   ierr = PetscFree(dat->iranks);CHKERRQ(ierr);
108   ierr = PetscFree(dat->ioffset);CHKERRQ(ierr);
109   ierr = PetscFree(dat->irootloc);CHKERRQ(ierr);
110   ierr = PetscFree(dat->recvcounts);CHKERRQ(ierr);
111   ierr = PetscFree(dat->displs);CHKERRQ(ierr);
112   if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
113   for (link=(PetscSFPack_Allgatherv)dat->avail; link; link=next) {
114     next = (PetscSFPack_Allgatherv)link->next;
115     if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);}
116     ierr = PetscFree(link->root);CHKERRQ(ierr);
117     ierr = PetscFree(link->leaf);CHKERRQ(ierr);
118     ierr = PetscFree(link);CHKERRQ(ierr);
119   }
120   dat->avail = NULL;
121   PetscFunctionReturn(0);
122 }
123 
124 PETSC_INTERN PetscErrorCode PetscSFDestroy_Allgatherv(PetscSF sf)
125 {
126   PetscErrorCode ierr;
127 
128   PetscFunctionBegin;
129   ierr = PetscSFReset_Allgatherv(sf);CHKERRQ(ierr);
130   ierr = PetscFree(sf->data);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 static PetscErrorCode PetscSFBcastAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
135 {
136   PetscErrorCode         ierr;
137   PetscSFPack_Allgatherv link;
138   PetscMPIInt            sendcount;
139   MPI_Comm               comm;
140   void                   *recvbuf;
141   PetscSF_Allgatherv     *dat = (PetscSF_Allgatherv*)sf->data;
142 
143   PetscFunctionBegin;
144   ierr = PetscSFPackGet_Allgatherv(sf,unit,rootdata,&link);CHKERRQ(ierr);
145   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
146   ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr);
147 
148   if (op == MPIU_REPLACE) {
149     recvbuf = leafdata;
150   } else {
151     if (!link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);}
152     recvbuf = link->leaf;
153   }
154   ierr = MPIU_Iallgatherv(rootdata,sendcount,unit,recvbuf,dat->recvcounts,dat->displs,unit,comm,&link->request);CHKERRQ(ierr);
155   PetscFunctionReturn(0);
156 }
157 
158 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
159 {
160   PetscErrorCode         ierr,(*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
161   PetscSFPack_Allgatherv link;
162 
163   PetscFunctionBegin;
164   ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
165   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
166   ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr);
167 
168   if (op != MPIU_REPLACE) {
169     if (UnpackAndOp) {ierr = (*UnpackAndOp)(sf->nleaves,link->bs,NULL,0,NULL,leafdata,link->leaf);CHKERRQ(ierr);}
170 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
171     else {
172       /* op might be user-defined */
173       PetscMPIInt count;
174       ierr = PetscMPIIntCast(sf->nleaves,&count);CHKERRQ(ierr);
175       ierr = MPI_Reduce_local(link->leaf,leafdata,count,unit,op);CHKERRQ(ierr);
176     }
177 #else
178     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
179 #endif
180   }
181   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 static PetscErrorCode PetscSFReduceBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
186 {
187   PetscErrorCode         ierr;
188   PetscSFPack_Allgatherv link;
189   PetscSF_Allgatherv     *dat = (PetscSF_Allgatherv*)sf->data;
190   PetscInt               rstart;
191   PetscMPIInt            rank,count,recvcount;
192   MPI_Comm               comm;
193 
194   PetscFunctionBegin;
195   ierr = PetscSFPackGet_Allgatherv(sf,unit,leafdata,&link);CHKERRQ(ierr);
196   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198 
199   if (op == MPIU_REPLACE) {
200     /* REPLACE is only meaningful when all processes have the same leafdata to readue. Therefore copy from local leafdata is fine */
201     ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr);
202     ierr = PetscMemcpy(rootdata,(const char*)leafdata+(size_t)rstart*link->unitbytes,(size_t)sf->nroots*link->unitbytes);CHKERRQ(ierr);
203   } else {
204     /* Reduce all leafdata on rank 0, then scatter the result to root buffer, then reduce root buffer to leafdata */
205     if (!rank && !link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);}
206     ierr = PetscMPIIntCast(sf->nleaves*link->bs,&count);CHKERRQ(ierr);
207     ierr = PetscMPIIntCast(sf->nroots,&recvcount);CHKERRQ(ierr);
208     ierr = MPI_Reduce(leafdata,link->leaf,count,link->basicunit,op,0,comm);CHKERRQ(ierr); /* Must do reduce with MPI builltin datatype basicunit */
209     if (!link->root) {ierr = PetscMalloc(sf->nroots*link->unitbytes,&link->root);CHKERRQ(ierr);}
210     ierr = MPIU_Iscatterv(link->leaf,dat->recvcounts,dat->displs,unit,link->root,recvcount,unit,0,comm,&link->request);CHKERRQ(ierr);
211   }
212   PetscFunctionReturn(0);
213 }
214 
215 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
216 {
217   PetscErrorCode         ierr,(*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
218   PetscSFPack_Allgatherv link;
219 
220   PetscFunctionBegin;
221   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
222   ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr);
223   if (op != MPIU_REPLACE) {
224     ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
225     if (UnpackAndOp) {ierr = (*UnpackAndOp)(sf->nroots,link->bs,NULL,0,NULL,rootdata,link->root);CHKERRQ(ierr);}
226 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
227     else if (sf->nroots) {
228       /* op might be user-defined */
229       PetscMPIInt count;
230       ierr = PetscMPIIntCast(sf->nroots,&count);CHKERRQ(ierr);
231       ierr = MPI_Reduce_local(link->root,rootdata,count,unit,op);CHKERRQ(ierr);
232     }
233 #else
234     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
235 #endif
236   }
237   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode PetscSFBcastToZero_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
242 {
243   PetscErrorCode         ierr;
244   PetscSFPack_Allgatherv link;
245 
246   PetscFunctionBegin;
247   ierr = PetscSFBcastAndOpBegin_Gatherv(sf,unit,rootdata,leafdata,MPIU_REPLACE);CHKERRQ(ierr);
248   /* A simplified PetscSFBcastAndOpEnd_Allgatherv */
249   ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
250   ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr);
251   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 /* This routine is very tricky (I believe it is rarely used with this kind of graph so just provide a simple but not-optimal implementation).
256 
257    Suppose we have three ranks. Rank 0 has a root with value 1. Rank 0,1,2 has a leaf with value 2,3,4 respectively. The leaves are connected
258    to the root on rank 0. Suppose op=MPI_SUM and rank 0,1,2 gets root state in their rank order. By definition of this routine, rank 0 sees 1
259    in root, fetches it into its leafupate, then updates root to 1 + 2 = 3; rank 1 sees 3 in root, fetches it into its leafupate, then updates
260    root to 3 + 3 = 6; rank 2 sees 6 in root, fetches it into its leafupdate, then updates root to 6 + 4 = 10.  At the end, leafupdate on rank
261    0,1,2 is 1,3,6 respectively. root is 10.
262 
263    One optimized implementation could be: starting from the initial state:
264              rank-0   rank-1    rank-2
265         Root     1
266         Leaf     2       3         4
267 
268    Shift leaves rightwards to leafupdate. Rank 0 gathers the root value and puts it in leafupdate. We have:
269              rank-0   rank-1    rank-2
270         Root     1
271         Leaf     2       3         4
272      Leafupdate  1       2         3
273 
274    Then, do MPI_Scan on leafupdate and get:
275              rank-0   rank-1    rank-2
276         Root     1
277         Leaf     2       3         4
278      Leafupdate  1       3         6
279 
280    Rank 2 sums its leaf and leafupdate, scatters the result to the root, and gets
281              rank-0   rank-1    rank-2
282         Root     10
283         Leaf     2       3         4
284      Leafupdate  1       3         6
285 
286    We use a simpler implementation. From the same initial state, we copy leafdata to leafupdate
287              rank-0   rank-1    rank-2
288         Root     1
289         Leaf     2       3         4
290      Leafupdate  2       3         4
291 
292    Do MPI_Exscan on leafupdate,
293              rank-0   rank-1    rank-2
294         Root     1
295         Leaf     2       3         4
296      Leafupdate  2       2         5
297 
298    BcastAndOp from root to leafupdate,
299              rank-0   rank-1    rank-2
300         Root     1
301         Leaf     2       3         4
302      Leafupdate  3       3         6
303 
304    Copy root to leafupdate on rank-0
305              rank-0   rank-1    rank-2
306         Root     1
307         Leaf     2       3         4
308      Leafupdate  1       3         6
309 
310    Reduce from leaf to root,
311              rank-0   rank-1    rank-2
312         Root     10
313         Leaf     2       3         4
314      Leafupdate  1       3         6
315 */
316 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
317 {
318   PetscErrorCode         ierr;
319   PetscSFPack_Allgatherv link;
320   MPI_Comm               comm;
321   PetscMPIInt            count;
322 
323   PetscFunctionBegin;
324   /* Copy leafdata to leafupdate */
325   ierr = PetscSFPackGet_Allgatherv(sf,unit,leafdata,&link);CHKERRQ(ierr);
326   ierr = PetscMemcpy(leafupdate,leafdata,sf->nleaves*link->unitbytes);CHKERRQ(ierr);
327   ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr);
328 
329   /* Exscan on leafupdate and then BcastAndOp rootdata to leafupdate */
330   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
331   ierr = PetscMPIIntCast(sf->nleaves,&count);CHKERRQ(ierr);
332   if (op == MPIU_REPLACE) {
333     PetscMPIInt size,rank,prev,next;
334     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
335     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
336     prev = rank ?            rank-1 : MPI_PROC_NULL;
337     next = (rank < size-1) ? rank+1 : MPI_PROC_NULL;
338     ierr = MPI_Sendrecv_replace(leafupdate,count,unit,next,link->tag,prev,link->tag,comm,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
339   } else {ierr = MPI_Exscan(MPI_IN_PLACE,leafupdate,count,unit,op,comm);CHKERRQ(ierr);}
340   ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr);
341   ierr = PetscSFBcastAndOpBegin(sf,unit,rootdata,leafupdate,op);CHKERRQ(ierr);
342   ierr = PetscSFBcastAndOpEnd(sf,unit,rootdata,leafupdate,op);CHKERRQ(ierr);
343 
344   /* Bcast roots to rank 0's leafupdate */
345   ierr = PetscSFBcastToZero_Private(sf,unit,rootdata,leafupdate);CHKERRQ(ierr); /* Using this line makes Allgather SFs able to inherit this routine */
346 
347   /* Reduce leafdata to rootdata */
348   ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
353 {
354   PetscErrorCode         ierr;
355 
356   PetscFunctionBegin;
357   ierr = PetscSFReduceEnd(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
358   PetscFunctionReturn(0);
359 }
360 
361 /* Get root ranks accessing my leaves */
362 PETSC_INTERN PetscErrorCode PetscSFGetRootRanks_Allgatherv(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
363 {
364   PetscErrorCode ierr;
365   PetscInt       i,j,k,size;
366   const PetscInt *range;
367 
368   PetscFunctionBegin;
369   /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */
370   if (sf->nranks && !sf->ranks) { /* On rank!=0, sf->nranks=0. The sf->nranks test makes this routine also works for sfgatherv */
371     size = sf->nranks;
372     ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr);
373     ierr = PetscMalloc4(size,&sf->ranks,size+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
374     for (i=0; i<size; i++) sf->ranks[i] = i;
375     ierr = PetscMemcpy(sf->roffset,range,sizeof(PetscInt)*(size+1));CHKERRQ(ierr);
376     for (i=0; i<sf->nleaves; i++) sf->rmine[i] = i; /*rmine are never NULL even for contiguous leaves */
377     for (i=0; i<size; i++) {
378       for (j=range[i],k=0; j<range[i+1]; j++,k++) sf->rremote[j] = k;
379     }
380   }
381 
382   if (nranks)  *nranks  = sf->nranks;
383   if (ranks)   *ranks   = sf->ranks;
384   if (roffset) *roffset = sf->roffset;
385   if (rmine)   *rmine   = sf->rmine;
386   if (rremote) *rremote = sf->rremote;
387   PetscFunctionReturn(0);
388 }
389 
390 /* Get leaf ranks accessing my roots */
391 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Allgatherv(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
392 {
393   PetscErrorCode     ierr;
394   PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
395   MPI_Comm           comm;
396   PetscMPIInt        size,rank;
397   PetscInt           i,j;
398 
399   PetscFunctionBegin;
400   /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */
401   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
402   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
403   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
404   if (niranks) *niranks = size;
405 
406   /* PetscSF_Basic has distinguished incoming ranks. Here we do not need that. But we must put self as the first and
407      sort other ranks. See comments in PetscSFSetUp_Basic about MatGetBrowsOfAoCols_MPIAIJ on why.
408    */
409   if (iranks) {
410     if (!dat->iranks) {
411       ierr = PetscMalloc1(size,&dat->iranks);CHKERRQ(ierr);
412       dat->iranks[0] = rank;
413       for (i=0,j=1; i<size; i++) {if (i == rank) continue; dat->iranks[j++] = i;}
414     }
415     *iranks = dat->iranks; /* dat->iranks was init'ed to NULL by PetscNewLog */
416   }
417 
418   if (ioffset) {
419     if (!dat->ioffset) {
420       ierr = PetscMalloc1(size+1,&dat->ioffset);CHKERRQ(ierr);
421       for (i=0; i<=size; i++) dat->ioffset[i] = i*sf->nroots;
422     }
423     *ioffset = dat->ioffset;
424   }
425 
426   if (irootloc) {
427     if (!dat->irootloc) {
428       ierr = PetscMalloc1(sf->nleaves,&dat->irootloc);CHKERRQ(ierr);
429       for (i=0; i<size; i++) {
430         for (j=0; j<sf->nroots; j++) dat->irootloc[i*sf->nroots+j] = j;
431       }
432     }
433     *irootloc = dat->irootloc;
434   }
435   PetscFunctionReturn(0);
436 }
437 
438 PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Allgatherv(PetscSF sf,PetscSF *out)
439 {
440   PetscInt       i,nroots,nleaves,rstart,*ilocal;
441   PetscSFNode    *iremote;
442   PetscSF        lsf;
443   PetscErrorCode ierr;
444 
445   PetscFunctionBegin;
446   nroots  = sf->nroots;
447   nleaves = sf->nleaves ? sf->nroots : 0;
448   ierr    = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
449   ierr    = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
450   ierr    = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr);
451 
452   for (i=0; i<nleaves; i++) {
453     ilocal[i]        = rstart + i; /* lsf does not change leave indices */
454     iremote[i].rank  = 0;          /* rank in PETSC_COMM_SELF */
455     iremote[i].index = i;          /* root index */
456   }
457 
458   ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
459   ierr = PetscSFSetGraph(lsf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
460   ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
461   *out = lsf;
462   PetscFunctionReturn(0);
463 }
464 
465 PETSC_INTERN PetscErrorCode PetscSFCreate_Allgatherv(PetscSF sf)
466 {
467   PetscErrorCode     ierr;
468   PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data;
469 
470   PetscFunctionBegin;
471   sf->ops->SetUp           = PetscSFSetUp_Allgatherv;
472   sf->ops->Reset           = PetscSFReset_Allgatherv;
473   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
474   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;
475   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;
476   sf->ops->GetGraph        = PetscSFGetGraph_Allgatherv;
477   sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Allgatherv;
478   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Allgatherv;
479   sf->ops->ReduceBegin     = PetscSFReduceBegin_Allgatherv;
480   sf->ops->ReduceEnd       = PetscSFReduceEnd_Allgatherv;
481   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Allgatherv;
482   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
483   sf->ops->CreateLocalSF   = PetscSFCreateLocalSF_Allgatherv;
484   sf->ops->BcastToZero     = PetscSFBcastToZero_Allgatherv;
485 
486   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
487   sf->data = (void*)dat;
488   PetscFunctionReturn(0);
489 }
490