xref: /petsc/src/vec/is/sf/utils/sfutils.c (revision c935b23e62bf55311f7f3bbc4c0f41860ec27e54)
1 #include <petsc/private/sfimpl.h> /*I  "petscsf.h"   I*/
2 #include <petsc/private/sectionimpl.h>
3 
4 /*@
5   PetscSFSetGraphLayout - Set a parallel star forest via global indices and a `PetscLayout`
6 
7   Collective
8 
9   Input Parameters:
10 + sf        - star forest
11 . layout    - `PetscLayout` defining the global space for roots
12 . nleaves   - number of leaf vertices on the current process, each of these references a root on any process
13 . ilocal    - locations of leaves in leafdata buffers, pass NULL for contiguous storage
14 . localmode - copy mode for ilocal
15 - gremote   - root vertices in global numbering corresponding to leaves in ilocal
16 
17   Level: intermediate
18 
19   Note:
20   Global indices must lie in [0, N) where N is the global size of layout.
21   Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`.
22 
23   Developer Notes:
24   Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
25   encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not
26   needed
27 
28 .seealso: `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
29 @*/
30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, const PetscInt *gremote)
31 {
32   const PetscInt *range;
33   PetscInt        i, nroots, ls = -1, ln = -1;
34   PetscMPIInt     lr = -1;
35   PetscSFNode    *remote;
36 
37   PetscFunctionBegin;
38   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
39   PetscCall(PetscLayoutSetUp(layout));
40   PetscCall(PetscLayoutGetLocalSize(layout, &nroots));
41   PetscCall(PetscLayoutGetRanges(layout, &range));
42   PetscCall(PetscMalloc1(nleaves, &remote));
43   if (nleaves) ls = gremote[0] + 1;
44   for (i = 0; i < nleaves; i++) {
45     const PetscInt idx = gremote[i] - ls;
46     if (idx < 0 || idx >= ln) { /* short-circuit the search */
47       PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index));
48       remote[i].rank = lr;
49       ls             = range[lr];
50       ln             = range[lr + 1] - ls;
51     } else {
52       remote[i].rank  = lr;
53       remote[i].index = idx;
54     }
55   }
56   PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /*@C
61   PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe this star forest
62 
63   Collective
64 
65   Input Parameter:
66 . sf - star forest
67 
68   Output Parameters:
69 + layout  - `PetscLayout` defining the global space for roots
70 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
71 . ilocal  - locations of leaves in leafdata buffers, or `NULL` for contiguous storage
72 - gremote - root vertices in global numbering corresponding to leaves in ilocal
73 
74   Level: intermediate
75 
76   Notes:
77   The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest.
78   The outputs `layout` and `gremote` are freshly created each time this function is called,
79   so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user.
80 
81 .seealso: `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
82 @*/
83 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[])
84 {
85   PetscInt           nr, nl;
86   const PetscSFNode *ir;
87   PetscLayout        lt;
88 
89   PetscFunctionBegin;
90   PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir));
91   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, &lt));
92   if (gremote) {
93     PetscInt        i;
94     const PetscInt *range;
95     PetscInt       *gr;
96 
97     PetscCall(PetscLayoutGetRanges(lt, &range));
98     PetscCall(PetscMalloc1(nl, &gr));
99     for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index;
100     *gremote = gr;
101   }
102   if (nleaves) *nleaves = nl;
103   if (layout) *layout = lt;
104   else PetscCall(PetscLayoutDestroy(&lt));
105   PetscFunctionReturn(PETSC_SUCCESS);
106 }
107 
108 /*@
109   PetscSFSetGraphSection - Sets the `PetscSF` graph encoding the parallel dof overlap based upon the `PetscSection` describing the data layout.
110 
111   Input Parameters:
112 + sf            - The `PetscSF`
113 . localSection  - `PetscSection` describing the local data layout
114 - globalSection - `PetscSection` describing the global data layout
115 
116   Level: developer
117 
118 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()`
119 @*/
120 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection)
121 {
122   MPI_Comm        comm;
123   PetscLayout     layout;
124   const PetscInt *ranges;
125   PetscInt       *local;
126   PetscSFNode    *remote;
127   PetscInt        pStart, pEnd, p, nroots, nleaves = 0, l;
128   PetscMPIInt     size, rank;
129 
130   PetscFunctionBegin;
131   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
132   PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2);
133   PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3);
134 
135   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
136   PetscCallMPI(MPI_Comm_size(comm, &size));
137   PetscCallMPI(MPI_Comm_rank(comm, &rank));
138   PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd));
139   PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots));
140   PetscCall(PetscLayoutCreate(comm, &layout));
141   PetscCall(PetscLayoutSetBlockSize(layout, 1));
142   PetscCall(PetscLayoutSetLocalSize(layout, nroots));
143   PetscCall(PetscLayoutSetUp(layout));
144   PetscCall(PetscLayoutGetRanges(layout, &ranges));
145   for (p = pStart; p < pEnd; ++p) {
146     PetscInt gdof, gcdof;
147 
148     PetscCall(PetscSectionGetDof(globalSection, p, &gdof));
149     PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof));
150     PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof);
151     nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof;
152   }
153   PetscCall(PetscMalloc1(nleaves, &local));
154   PetscCall(PetscMalloc1(nleaves, &remote));
155   for (p = pStart, l = 0; p < pEnd; ++p) {
156     const PetscInt *cind;
157     PetscInt        dof, cdof, off, gdof, gcdof, goff, gsize, d, c;
158 
159     PetscCall(PetscSectionGetDof(localSection, p, &dof));
160     PetscCall(PetscSectionGetOffset(localSection, p, &off));
161     PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof));
162     PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind));
163     PetscCall(PetscSectionGetDof(globalSection, p, &gdof));
164     PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof));
165     PetscCall(PetscSectionGetOffset(globalSection, p, &goff));
166     if (!gdof) continue; /* Censored point */
167     gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof;
168     if (gsize != dof - cdof) {
169       PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof);
170       cdof = 0; /* Ignore constraints */
171     }
172     for (d = 0, c = 0; d < dof; ++d) {
173       if ((c < cdof) && (cind[c] == d)) {
174         ++c;
175         continue;
176       }
177       local[l + d - c] = off + d;
178     }
179     PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c);
180     if (gdof < 0) {
181       for (d = 0; d < gsize; ++d, ++l) {
182         PetscInt    offset = -(goff + 1) + d, ir;
183         PetscMPIInt r;
184 
185         PetscCall(PetscFindInt(offset, size + 1, ranges, &ir));
186         PetscCall(PetscMPIIntCast(ir, &r));
187         if (r < 0) r = -(r + 2);
188         PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff);
189         remote[l].rank  = r;
190         remote[l].index = offset - ranges[r];
191       }
192     } else {
193       for (d = 0; d < gsize; ++d, ++l) {
194         remote[l].rank  = rank;
195         remote[l].index = goff + d - ranges[rank];
196       }
197     }
198   }
199   PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves);
200   PetscCall(PetscLayoutDestroy(&layout));
201   PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER));
202   PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 
205 /*@C
206   PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF`
207 
208   Collective
209 
210   Input Parameters:
211 + sf          - The `PetscSF`
212 - rootSection - Section defined on root space
213 
214   Output Parameters:
215 + remoteOffsets - root offsets in leaf storage, or `NULL`
216 - leafSection   - Section defined on the leaf space
217 
218   Level: advanced
219 
220   Fortran Notes:
221   In Fortran, use PetscSFDistributeSectionF90()
222 
223 .seealso: `PetscSF`, `PetscSFCreate()`
224 @*/
225 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt **remoteOffsets, PetscSection leafSection)
226 {
227   PetscSF         embedSF;
228   const PetscInt *indices;
229   IS              selected;
230   PetscInt        numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c;
231   PetscBool      *sub, hasc;
232   PetscMPIInt     msize;
233 
234   PetscFunctionBegin;
235   PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0));
236   PetscCall(PetscSectionGetNumFields(rootSection, &numFields));
237   if (numFields) {
238     IS perm;
239 
240     /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys
241        leafSection->perm. To keep this permutation set by the user, we grab
242        the reference before calling PetscSectionSetNumFields() and set it
243        back after. */
244     PetscCall(PetscSectionGetPermutation(leafSection, &perm));
245     PetscCall(PetscObjectReference((PetscObject)perm));
246     PetscCall(PetscSectionSetNumFields(leafSection, numFields));
247     PetscCall(PetscSectionSetPermutation(leafSection, perm));
248     PetscCall(ISDestroy(&perm));
249   }
250   PetscCall(PetscMalloc1(numFields + 2, &sub));
251   sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE;
252   for (f = 0; f < numFields; ++f) {
253     PetscSectionSym sym, dsym = NULL;
254     const char     *name    = NULL;
255     PetscInt        numComp = 0;
256 
257     sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE;
258     PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp));
259     PetscCall(PetscSectionGetFieldName(rootSection, f, &name));
260     PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym));
261     if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym));
262     PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp));
263     PetscCall(PetscSectionSetFieldName(leafSection, f, name));
264     PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym));
265     PetscCall(PetscSectionSymDestroy(&dsym));
266     for (c = 0; c < rootSection->numFieldComponents[f]; ++c) {
267       PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name));
268       PetscCall(PetscSectionSetComponentName(leafSection, f, c, name));
269     }
270   }
271   PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd));
272   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
273   rpEnd = PetscMin(rpEnd, nroots);
274   rpEnd = PetscMax(rpStart, rpEnd);
275   /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */
276   sub[0] = (PetscBool)(nroots != rpEnd - rpStart);
277   PetscCall(PetscMPIIntCast(2 + numFields, &msize));
278   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, msize, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
279   if (sub[0]) {
280     PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected));
281     PetscCall(ISGetIndices(selected, &indices));
282     PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF));
283     PetscCall(ISRestoreIndices(selected, &indices));
284     PetscCall(ISDestroy(&selected));
285   } else {
286     PetscCall(PetscObjectReference((PetscObject)sf));
287     embedSF = sf;
288   }
289   PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd));
290   lpEnd++;
291 
292   PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd));
293 
294   /* Constrained dof section */
295   hasc = sub[1];
296   for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]);
297 
298   /* Could fuse these at the cost of copies and extra allocation */
299   PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE));
300   PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE));
301   if (sub[1]) {
302     PetscCall(PetscSectionCheckConstraints_Private(rootSection));
303     PetscCall(PetscSectionCheckConstraints_Private(leafSection));
304     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE));
305     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE));
306   }
307   for (f = 0; f < numFields; ++f) {
308     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE));
309     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE));
310     if (sub[2 + f]) {
311       PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f]));
312       PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f]));
313       PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE));
314       PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE));
315     }
316   }
317   if (remoteOffsets) {
318     PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets));
319     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
320     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
321   }
322   PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection));
323   PetscCall(PetscSectionSetUp(leafSection));
324   if (hasc) { /* need to communicate bcIndices */
325     PetscSF   bcSF;
326     PetscInt *rOffBc;
327 
328     PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc));
329     if (sub[1]) {
330       PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
331       PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
332       PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF));
333       PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE));
334       PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE));
335       PetscCall(PetscSFDestroy(&bcSF));
336     }
337     for (f = 0; f < numFields; ++f) {
338       if (sub[2 + f]) {
339         PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
340         PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
341         PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF));
342         PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE));
343         PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE));
344         PetscCall(PetscSFDestroy(&bcSF));
345       }
346     }
347     PetscCall(PetscFree(rOffBc));
348   }
349   PetscCall(PetscSFDestroy(&embedSF));
350   PetscCall(PetscFree(sub));
351   PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0));
352   PetscFunctionReturn(PETSC_SUCCESS);
353 }
354 
355 /*@C
356   PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes
357 
358   Collective
359 
360   Input Parameters:
361 + sf          - The `PetscSF`
362 . rootSection - Data layout of remote points for outgoing data (this is layout for SF roots)
363 - leafSection - Data layout of local points for incoming data  (this is layout for SF leaves)
364 
365   Output Parameter:
366 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL
367 
368   Level: developer
369 
370   Fortran Notes:
371   In Fortran, use PetscSFCreateRemoteOffsetsF90()
372 
373 .seealso: `PetscSF`, `PetscSFCreate()`
374 @*/
375 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt **remoteOffsets)
376 {
377   PetscSF         embedSF;
378   const PetscInt *indices;
379   IS              selected;
380   PetscInt        numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0;
381 
382   PetscFunctionBegin;
383   *remoteOffsets = NULL;
384   PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL));
385   if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS);
386   PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0));
387   PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd));
388   PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd));
389   PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected));
390   PetscCall(ISGetIndices(selected, &indices));
391   PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF));
392   PetscCall(ISRestoreIndices(selected, &indices));
393   PetscCall(ISDestroy(&selected));
394   PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets));
395   PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
396   PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
397   PetscCall(PetscSFDestroy(&embedSF));
398   PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0));
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 /*@C
403   PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points
404 
405   Collective
406 
407   Input Parameters:
408 + sf            - The `PetscSF`
409 . rootSection   - Data layout of remote points for outgoing data (this is usually the serial section)
410 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL
411 - leafSection   - Data layout of local points for incoming data  (this is the distributed section)
412 
413   Output Parameter:
414 . sectionSF - The new `PetscSF`
415 
416   Level: advanced
417 
418   Notes:
419   `remoteOffsets` can be NULL if `sf` does not reference any points in leafSection
420 
421   Fortran Notes:
422   In Fortran, use PetscSFCreateSectionSFF90()
423 
424 .seealso: `PetscSF`, `PetscSFCreate()`
425 @*/
426 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF)
427 {
428   MPI_Comm           comm;
429   const PetscInt    *localPoints;
430   const PetscSFNode *remotePoints;
431   PetscInt           lpStart, lpEnd;
432   PetscInt           numRoots, numSectionRoots, numPoints, numIndices = 0;
433   PetscInt          *localIndices;
434   PetscSFNode       *remoteIndices;
435   PetscInt           i, ind;
436 
437   PetscFunctionBegin;
438   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
439   PetscAssertPointer(rootSection, 2);
440   /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */
441   PetscAssertPointer(leafSection, 4);
442   PetscAssertPointer(sectionSF, 5);
443   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
444   PetscCall(PetscSFCreate(comm, sectionSF));
445   PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd));
446   PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots));
447   PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints));
448   if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS);
449   PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0));
450   for (i = 0; i < numPoints; ++i) {
451     PetscInt localPoint = localPoints ? localPoints[i] : i;
452     PetscInt dof;
453 
454     if ((localPoint >= lpStart) && (localPoint < lpEnd)) {
455       PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof));
456       numIndices += dof < 0 ? 0 : dof;
457     }
458   }
459   PetscCall(PetscMalloc1(numIndices, &localIndices));
460   PetscCall(PetscMalloc1(numIndices, &remoteIndices));
461   /* Create new index graph */
462   for (i = 0, ind = 0; i < numPoints; ++i) {
463     PetscInt localPoint = localPoints ? localPoints[i] : i;
464     PetscInt rank       = remotePoints[i].rank;
465 
466     if ((localPoint >= lpStart) && (localPoint < lpEnd)) {
467       PetscInt remoteOffset = remoteOffsets[localPoint - lpStart];
468       PetscInt loff, dof, d;
469 
470       PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff));
471       PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof));
472       for (d = 0; d < dof; ++d, ++ind) {
473         localIndices[ind]        = loff + d;
474         remoteIndices[ind].rank  = rank;
475         remoteIndices[ind].index = remoteOffset + d;
476       }
477     }
478   }
479   PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices);
480   PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER));
481   PetscCall(PetscSFSetUp(*sectionSF));
482   PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0));
483   PetscFunctionReturn(PETSC_SUCCESS);
484 }
485 
486 /*@C
487   PetscSFCreateFromLayouts - Creates a parallel star forest mapping two `PetscLayout` objects
488 
489   Collective
490 
491   Input Parameters:
492 + rmap - `PetscLayout` defining the global root space
493 - lmap - `PetscLayout` defining the global leaf space
494 
495   Output Parameter:
496 . sf - The parallel star forest
497 
498   Level: intermediate
499 
500 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()`
501 @*/
502 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf)
503 {
504   PetscInt     i, nroots, nleaves = 0;
505   PetscInt     rN, lst, len;
506   PetscMPIInt  owner = -1;
507   PetscSFNode *remote;
508   MPI_Comm     rcomm = rmap->comm;
509   MPI_Comm     lcomm = lmap->comm;
510   PetscMPIInt  flg;
511 
512   PetscFunctionBegin;
513   PetscAssertPointer(sf, 3);
514   PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup");
515   PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup");
516   PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg));
517   PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators");
518   PetscCall(PetscSFCreate(rcomm, sf));
519   PetscCall(PetscLayoutGetLocalSize(rmap, &nroots));
520   PetscCall(PetscLayoutGetSize(rmap, &rN));
521   PetscCall(PetscLayoutGetRange(lmap, &lst, &len));
522   PetscCall(PetscMalloc1(len - lst, &remote));
523   for (i = lst; i < len && i < rN; i++) {
524     if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner));
525     remote[nleaves].rank  = owner;
526     remote[nleaves].index = i - rmap->range[owner];
527     nleaves++;
528   }
529   PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES));
530   PetscCall(PetscFree(remote));
531   PetscFunctionReturn(PETSC_SUCCESS);
532 }
533 
534 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */
535 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt **oidxs, PetscInt **ogidxs)
536 {
537   PetscInt    *owners = map->range;
538   PetscInt     n      = map->n;
539   PetscSF      sf;
540   PetscInt    *lidxs, *work = NULL;
541   PetscSFNode *ridxs;
542   PetscMPIInt  rank, p = 0;
543   PetscInt     r, len  = 0;
544 
545   PetscFunctionBegin;
546   if (on) *on = 0; /* squelch -Wmaybe-uninitialized */
547   /* Create SF where leaves are input idxs and roots are owned idxs */
548   PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
549   PetscCall(PetscMalloc1(n, &lidxs));
550   for (r = 0; r < n; ++r) lidxs[r] = -1;
551   PetscCall(PetscMalloc1(N, &ridxs));
552   for (r = 0; r < N; ++r) {
553     const PetscInt idx = idxs[r];
554     PetscCheck(idx >= 0 && idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N);
555     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */
556       PetscCall(PetscLayoutFindOwner(map, idx, &p));
557     }
558     ridxs[r].rank  = p;
559     ridxs[r].index = idxs[r] - owners[p];
560   }
561   PetscCall(PetscSFCreate(map->comm, &sf));
562   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER));
563   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR));
564   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR));
565   if (ogidxs) { /* communicate global idxs */
566     PetscInt cum = 0, start, *work2;
567 
568     PetscCall(PetscMalloc1(n, &work));
569     PetscCall(PetscCalloc1(N, &work2));
570     for (r = 0; r < N; ++r)
571       if (idxs[r] >= 0) cum++;
572     PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm));
573     start -= cum;
574     cum = 0;
575     for (r = 0; r < N; ++r)
576       if (idxs[r] >= 0) work2[r] = start + cum++;
577     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE));
578     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE));
579     PetscCall(PetscFree(work2));
580   }
581   PetscCall(PetscSFDestroy(&sf));
582   /* Compress and put in indices */
583   for (r = 0; r < n; ++r)
584     if (lidxs[r] >= 0) {
585       if (work) work[len] = work[r];
586       lidxs[len++] = r;
587     }
588   if (on) *on = len;
589   if (oidxs) *oidxs = lidxs;
590   if (ogidxs) *ogidxs = work;
591   PetscFunctionReturn(PETSC_SUCCESS);
592 }
593 
594 /*@
595   PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices
596 
597   Collective
598 
599   Input Parameters:
600 + layout           - `PetscLayout` defining the global index space and the rank that brokers each index
601 . numRootIndices   - size of rootIndices
602 . rootIndices      - `PetscInt` array of global indices of which this process requests ownership
603 . rootLocalIndices - root local index permutation (NULL if no permutation)
604 . rootLocalOffset  - offset to be added to root local indices
605 . numLeafIndices   - size of leafIndices
606 . leafIndices      - `PetscInt` array of global indices with which this process requires data associated
607 . leafLocalIndices - leaf local index permutation (NULL if no permutation)
608 - leafLocalOffset  - offset to be added to leaf local indices
609 
610   Output Parameters:
611 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed)
612 - sf  - star forest representing the communication pattern from the root space to the leaf space
613 
614   Level: advanced
615 
616   Example 1:
617 .vb
618   rank             : 0            1            2
619   rootIndices      : [1 0 2]      [3]          [3]
620   rootLocalOffset  : 100          200          300
621   layout           : [0 1]        [2]          [3]
622   leafIndices      : [0]          [2]          [0 3]
623   leafLocalOffset  : 400          500          600
624 
625 would build the following PetscSF
626 
627   [0] 400 <- (0,101)
628   [1] 500 <- (0,102)
629   [2] 600 <- (0,101)
630   [2] 601 <- (2,300)
631 .ve
632 
633   Example 2:
634 .vb
635   rank             : 0               1               2
636   rootIndices      : [1 0 2]         [3]             [3]
637   rootLocalOffset  : 100             200             300
638   layout           : [0 1]           [2]             [3]
639   leafIndices      : rootIndices     rootIndices     rootIndices
640   leafLocalOffset  : rootLocalOffset rootLocalOffset rootLocalOffset
641 
642 would build the following PetscSF
643 
644   [1] 200 <- (2,300)
645 .ve
646 
647   Example 3:
648 .vb
649   No process requests ownership of global index 1, but no process needs it.
650 
651   rank             : 0            1            2
652   numRootIndices   : 2            1            1
653   rootIndices      : [0 2]        [3]          [3]
654   rootLocalOffset  : 100          200          300
655   layout           : [0 1]        [2]          [3]
656   numLeafIndices   : 1            1            2
657   leafIndices      : [0]          [2]          [0 3]
658   leafLocalOffset  : 400          500          600
659 
660 would build the following PetscSF
661 
662   [0] 400 <- (0,100)
663   [1] 500 <- (0,101)
664   [2] 600 <- (0,100)
665   [2] 601 <- (2,300)
666 .ve
667 
668   Notes:
669   The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its
670   local size can be set to `PETSC_DECIDE`.
671 
672   If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests
673   ownership of x and sends its own rank and the local index of x to process i.
674   If multiple processes request ownership of x, the one with the highest rank is to own x.
675   Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the
676   ownership information of x.
677   The output sf is constructed by associating each leaf point to a root point in this way.
678 
679   Suppose there is point data ordered according to the global indices and partitioned according to the given layout.
680   The optional output `PetscSF` object sfA can be used to push such data to leaf points.
681 
682   All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices
683   must cover that of leafIndices, but need not cover the entire layout.
684 
685   If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output
686   star forest is almost identity, so will only include non-trivial part of the map.
687 
688   Developer Notes:
689   Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using
690   hash(rank, root_local_index) as the bid for the ownership determination.
691 
692 .seealso: `PetscSF`, `PetscSFCreate()`
693 @*/
694 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf)
695 {
696   MPI_Comm     comm = layout->comm;
697   PetscMPIInt  size, rank;
698   PetscSF      sf1;
699   PetscSFNode *owners, *buffer, *iremote;
700   PetscInt    *ilocal, nleaves, N, n, i;
701 #if defined(PETSC_USE_DEBUG)
702   PetscInt N1;
703 #endif
704   PetscBool flag;
705 
706   PetscFunctionBegin;
707   if (rootIndices) PetscAssertPointer(rootIndices, 3);
708   if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4);
709   if (leafIndices) PetscAssertPointer(leafIndices, 7);
710   if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8);
711   if (sfA) PetscAssertPointer(sfA, 10);
712   PetscAssertPointer(sf, 11);
713   PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices);
714   PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices);
715   PetscCallMPI(MPI_Comm_size(comm, &size));
716   PetscCallMPI(MPI_Comm_rank(comm, &rank));
717   PetscCall(PetscLayoutSetUp(layout));
718   PetscCall(PetscLayoutGetSize(layout, &N));
719   PetscCall(PetscLayoutGetLocalSize(layout, &n));
720   flag = (PetscBool)(leafIndices == rootIndices);
721   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm));
722   PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices);
723 #if defined(PETSC_USE_DEBUG)
724   N1 = PETSC_INT_MIN;
725   for (i = 0; i < numRootIndices; i++)
726     if (rootIndices[i] > N1) N1 = rootIndices[i];
727   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm));
728   PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N);
729   if (!flag) {
730     N1 = PETSC_INT_MIN;
731     for (i = 0; i < numLeafIndices; i++)
732       if (leafIndices[i] > N1) N1 = leafIndices[i];
733     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm));
734     PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N);
735   }
736 #endif
737   /* Reduce: owners -> buffer */
738   PetscCall(PetscMalloc1(n, &buffer));
739   PetscCall(PetscSFCreate(comm, &sf1));
740   PetscCall(PetscSFSetFromOptions(sf1));
741   PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices));
742   PetscCall(PetscMalloc1(numRootIndices, &owners));
743   for (i = 0; i < numRootIndices; ++i) {
744     owners[i].rank  = rank;
745     owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i);
746   }
747   for (i = 0; i < n; ++i) {
748     buffer[i].index = -1;
749     buffer[i].rank  = -1;
750   }
751   PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC));
752   PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC));
753   /* Bcast: buffer -> owners */
754   if (!flag) {
755     /* leafIndices is different from rootIndices */
756     PetscCall(PetscFree(owners));
757     PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices));
758     PetscCall(PetscMalloc1(numLeafIndices, &owners));
759   }
760   PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE));
761   PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE));
762   for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]);
763   PetscCall(PetscFree(buffer));
764   if (sfA) {
765     *sfA = sf1;
766   } else PetscCall(PetscSFDestroy(&sf1));
767   /* Create sf */
768   if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) {
769     /* leaf space == root space */
770     for (i = 0, nleaves = 0; i < numLeafIndices; ++i)
771       if (owners[i].rank != rank) ++nleaves;
772     PetscCall(PetscMalloc1(nleaves, &ilocal));
773     PetscCall(PetscMalloc1(nleaves, &iremote));
774     for (i = 0, nleaves = 0; i < numLeafIndices; ++i) {
775       if (owners[i].rank != rank) {
776         ilocal[nleaves]        = leafLocalOffset + i;
777         iremote[nleaves].rank  = owners[i].rank;
778         iremote[nleaves].index = owners[i].index;
779         ++nleaves;
780       }
781     }
782     PetscCall(PetscFree(owners));
783   } else {
784     nleaves = numLeafIndices;
785     PetscCall(PetscMalloc1(nleaves, &ilocal));
786     for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i);
787     iremote = owners;
788   }
789   PetscCall(PetscSFCreate(comm, sf));
790   PetscCall(PetscSFSetFromOptions(*sf));
791   PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 /*@
796   PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb`
797 
798   Collective
799 
800   Input Parameters:
801 + sfa - default `PetscSF`
802 - sfb - additional edges to add/replace edges in sfa
803 
804   Output Parameter:
805 . merged - new `PetscSF` with combined edges
806 
807   Level: intermediate
808 
809 .seealso: `PetscSFCompose()`
810 @*/
811 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged)
812 {
813   PetscInt maxleaf;
814 
815   PetscFunctionBegin;
816   PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1);
817   PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2);
818   PetscCheckSameComm(sfa, 1, sfb, 2);
819   PetscAssertPointer(merged, 3);
820   {
821     PetscInt aleaf, bleaf;
822     PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf));
823     PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf));
824     maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index
825   }
826   PetscInt          *clocal, aroots, aleaves, broots, bleaves;
827   PetscSFNode       *cremote;
828   const PetscInt    *alocal, *blocal;
829   const PetscSFNode *aremote, *bremote;
830   PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote));
831   for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1;
832   PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote));
833   PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote));
834   PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space");
835   for (PetscInt i = 0; i < aleaves; i++) {
836     PetscInt a = alocal ? alocal[i] : i;
837     clocal[a]  = a;
838     cremote[a] = aremote[i];
839   }
840   for (PetscInt i = 0; i < bleaves; i++) {
841     PetscInt b = blocal ? blocal[i] : i;
842     clocal[b]  = b;
843     cremote[b] = bremote[i];
844   }
845   PetscInt nleaves = 0;
846   for (PetscInt i = 0; i < maxleaf; i++) {
847     if (clocal[i] < 0) continue;
848     clocal[nleaves]  = clocal[i];
849     cremote[nleaves] = cremote[i];
850     nleaves++;
851   }
852   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged));
853   PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES));
854   PetscCall(PetscFree2(clocal, cremote));
855   PetscFunctionReturn(PETSC_SUCCESS);
856 }
857 
858 /*@
859   PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data
860 
861   Collective
862 
863   Input Parameters:
864 + sf  - star forest
865 . bs  - stride
866 . ldr - leading dimension of root space
867 - ldl - leading dimension of leaf space
868 
869   Output Parameter:
870 . vsf - the new `PetscSF`
871 
872   Level: intermediate
873 
874   Notes:
875   This can be useful to perform communications on blocks of right-hand sides. For example, the calling sequence
876 .vb
877   c_datatype *roots, *leaves;
878   for i in [0,bs) do
879     PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op)
880     PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op)
881 .ve
882   is equivalent to
883 .vb
884   c_datatype *roots, *leaves;
885   PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf)
886   PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op)
887   PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op)
888 .ve
889 
890   Developer Notes:
891   Should this functionality be handled with a new API instead of creating a new object?
892 
893 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`
894 @*/
895 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
896 {
897   PetscSF            rankssf;
898   const PetscSFNode *iremote, *sfrremote;
899   PetscSFNode       *viremote;
900   const PetscInt    *ilocal;
901   PetscInt          *vilocal = NULL, *ldrs;
902   PetscInt           nranks, nr, nl, vnr, vnl, maxl;
903   PetscMPIInt        rank;
904   MPI_Comm           comm;
905   PetscSFType        sftype;
906 
907   PetscFunctionBegin;
908   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
909   PetscValidLogicalCollectiveInt(sf, bs, 2);
910   PetscAssertPointer(vsf, 5);
911   if (bs == 1) {
912     PetscCall(PetscObjectReference((PetscObject)sf));
913     *vsf = sf;
914     PetscFunctionReturn(PETSC_SUCCESS);
915   }
916   PetscCall(PetscSFSetUp(sf));
917   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
918   PetscCallMPI(MPI_Comm_rank(comm, &rank));
919   PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote));
920   PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl));
921   maxl += 1;
922   if (ldl == PETSC_DECIDE) ldl = maxl;
923   if (ldr == PETSC_DECIDE) ldr = nr;
924   PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr);
925   PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1);
926   vnr = nr * bs;
927   vnl = nl * bs;
928   PetscCall(PetscMalloc1(vnl, &viremote));
929   PetscCall(PetscMalloc1(vnl, &vilocal));
930 
931   /* Communicate root leading dimensions to leaf ranks */
932   PetscCall(PetscSFGetRanksSF(sf, &rankssf));
933   PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote));
934   PetscCall(PetscMalloc1(nranks, &ldrs));
935   PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
936   PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
937 
938   for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) {
939     const PetscInt r  = iremote[i].rank;
940     const PetscInt ii = iremote[i].index;
941 
942     if (r == rank) lda = ldr;
943     else if (rold != r) {
944       PetscInt j;
945 
946       for (j = 0; j < nranks; j++)
947         if (sfrremote[j].rank == r) break;
948       PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r);
949       lda = ldrs[j];
950     }
951     rold = r;
952     for (PetscInt v = 0; v < bs; v++) {
953       viremote[v * nl + i].rank  = r;
954       viremote[v * nl + i].index = v * lda + ii;
955       vilocal[v * nl + i]        = v * ldl + (ilocal ? ilocal[i] : i);
956     }
957   }
958   PetscCall(PetscFree(ldrs));
959   PetscCall(PetscSFCreate(comm, vsf));
960   PetscCall(PetscSFGetType(sf, &sftype));
961   PetscCall(PetscSFSetType(*vsf, sftype));
962   PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER));
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965