xref: /petsc/src/vec/is/sf/utils/sfutils.c (revision 57d508425293f0bb93f59574d14951d8faac9af8)
1 #include <petsc/private/sfimpl.h> /*I  "petscsf.h"   I*/
2 #include <petsc/private/sectionimpl.h>
3 
4 /*@
5   PetscSFSetGraphLayout - Set a parallel star forest via global indices and a `PetscLayout`
6 
7   Collective
8 
9   Input Parameters:
10 + sf        - star forest
11 . layout    - `PetscLayout` defining the global space for roots
12 . nleaves   - number of leaf vertices on the current process, each of these references a root on any process
13 . ilocal    - locations of leaves in leafdata buffers, pass NULL for contiguous storage
14 . localmode - copy mode for ilocal
15 - gremote   - root vertices in global numbering corresponding to leaves in ilocal
16 
17   Level: intermediate
18 
19   Note:
20   Global indices must lie in [0, N) where N is the global size of layout.
21   Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`.
22 
23   Developer Notes:
24   Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
25   encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not
26   needed
27 
28 .seealso: `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
29 @*/
30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, const PetscInt *gremote)
31 {
32   const PetscInt *range;
33   PetscInt        i, nroots, ls = -1, ln = -1;
34   PetscMPIInt     lr = -1;
35   PetscSFNode    *remote;
36 
37   PetscFunctionBegin;
38   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
39   PetscCall(PetscLayoutSetUp(layout));
40   PetscCall(PetscLayoutGetLocalSize(layout, &nroots));
41   PetscCall(PetscLayoutGetRanges(layout, &range));
42   PetscCall(PetscMalloc1(nleaves, &remote));
43   if (nleaves) ls = gremote[0] + 1;
44   for (i = 0; i < nleaves; i++) {
45     const PetscInt idx = gremote[i] - ls;
46     if (idx < 0 || idx >= ln) { /* short-circuit the search */
47       PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index));
48       remote[i].rank = lr;
49       ls             = range[lr];
50       ln             = range[lr + 1] - ls;
51     } else {
52       remote[i].rank  = lr;
53       remote[i].index = idx;
54     }
55   }
56   PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /*@C
61   PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe this star forest
62 
63   Collective
64 
65   Input Parameter:
66 . sf - star forest
67 
68   Output Parameters:
69 + layout  - `PetscLayout` defining the global space for roots
70 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
71 . ilocal  - locations of leaves in leafdata buffers, or `NULL` for contiguous storage
72 - gremote - root vertices in global numbering corresponding to leaves in ilocal
73 
74   Level: intermediate
75 
76   Notes:
77   The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest.
78   The outputs `layout` and `gremote` are freshly created each time this function is called,
79   so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user.
80 
81 .seealso: `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
82 @*/
83 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[])
84 {
85   PetscInt           nr, nl;
86   const PetscSFNode *ir;
87   PetscLayout        lt;
88 
89   PetscFunctionBegin;
90   PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir));
91   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, &lt));
92   if (gremote) {
93     PetscInt        i;
94     const PetscInt *range;
95     PetscInt       *gr;
96 
97     PetscCall(PetscLayoutGetRanges(lt, &range));
98     PetscCall(PetscMalloc1(nl, &gr));
99     for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index;
100     *gremote = gr;
101   }
102   if (nleaves) *nleaves = nl;
103   if (layout) *layout = lt;
104   else PetscCall(PetscLayoutDestroy(&lt));
105   PetscFunctionReturn(PETSC_SUCCESS);
106 }
107 
108 /*@
109   PetscSFSetGraphSection - Sets the `PetscSF` graph encoding the parallel dof overlap based upon the `PetscSection` describing the data layout.
110 
111   Input Parameters:
112 + sf            - The `PetscSF`
113 . localSection  - `PetscSection` describing the local data layout
114 - globalSection - `PetscSection` describing the global data layout
115 
116   Level: developer
117 
118 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()`
119 @*/
120 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection)
121 {
122   MPI_Comm        comm;
123   PetscLayout     layout;
124   const PetscInt *ranges;
125   PetscInt       *local;
126   PetscSFNode    *remote;
127   PetscInt        pStart, pEnd, p, nroots, nleaves = 0, l;
128   PetscMPIInt     size, rank;
129 
130   PetscFunctionBegin;
131   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
132   PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2);
133   PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3);
134 
135   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
136   PetscCallMPI(MPI_Comm_size(comm, &size));
137   PetscCallMPI(MPI_Comm_rank(comm, &rank));
138   PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd));
139   PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots));
140   PetscCall(PetscLayoutCreate(comm, &layout));
141   PetscCall(PetscLayoutSetBlockSize(layout, 1));
142   PetscCall(PetscLayoutSetLocalSize(layout, nroots));
143   PetscCall(PetscLayoutSetUp(layout));
144   PetscCall(PetscLayoutGetRanges(layout, &ranges));
145   for (p = pStart; p < pEnd; ++p) {
146     PetscInt gdof, gcdof;
147 
148     PetscCall(PetscSectionGetDof(globalSection, p, &gdof));
149     PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof));
150     PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof);
151     nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof;
152   }
153   PetscCall(PetscMalloc1(nleaves, &local));
154   PetscCall(PetscMalloc1(nleaves, &remote));
155   for (p = pStart, l = 0; p < pEnd; ++p) {
156     const PetscInt *cind;
157     PetscInt        dof, cdof, off, gdof, gcdof, goff, gsize, d, c;
158 
159     PetscCall(PetscSectionGetDof(localSection, p, &dof));
160     PetscCall(PetscSectionGetOffset(localSection, p, &off));
161     PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof));
162     PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind));
163     PetscCall(PetscSectionGetDof(globalSection, p, &gdof));
164     PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof));
165     PetscCall(PetscSectionGetOffset(globalSection, p, &goff));
166     if (!gdof) continue; /* Censored point */
167     gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof;
168     if (gsize != dof - cdof) {
169       PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof);
170       cdof = 0; /* Ignore constraints */
171     }
172     for (d = 0, c = 0; d < dof; ++d) {
173       if ((c < cdof) && (cind[c] == d)) {
174         ++c;
175         continue;
176       }
177       local[l + d - c] = off + d;
178     }
179     PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c);
180     if (gdof < 0) {
181       for (d = 0; d < gsize; ++d, ++l) {
182         PetscInt    offset = -(goff + 1) + d, ir;
183         PetscMPIInt r;
184 
185         PetscCall(PetscFindInt(offset, size + 1, ranges, &ir));
186         PetscCall(PetscMPIIntCast(ir, &r));
187         if (r < 0) r = -(r + 2);
188         PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff);
189         remote[l].rank  = r;
190         remote[l].index = offset - ranges[r];
191       }
192     } else {
193       for (d = 0; d < gsize; ++d, ++l) {
194         remote[l].rank  = rank;
195         remote[l].index = goff + d - ranges[rank];
196       }
197     }
198   }
199   PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves);
200   PetscCall(PetscLayoutDestroy(&layout));
201   PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER));
202   PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 
205 /*@C
206   PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF`
207 
208   Collective
209 
210   Input Parameters:
211 + sf          - The `PetscSF`
212 - rootSection - Section defined on root space
213 
214   Output Parameters:
215 + remoteOffsets - root offsets in leaf storage, or `NULL`, its length will be the size of the chart of `leafSection`
216 - leafSection   - Section defined on the leaf space
217 
218   Level: advanced
219 
220   Note:
221   Caller must `PetscFree()` `remoteOffsets` if it was requested
222 
223   Fortran Note:
224   Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed.
225 
226 .seealso: `PetscSF`, `PetscSFCreate()`
227 @*/
228 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt *remoteOffsets[], PetscSection leafSection)
229 {
230   PetscSF         embedSF;
231   const PetscInt *indices;
232   IS              selected;
233   PetscInt        numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c;
234   PetscBool      *sub, hasc;
235 
236   PetscFunctionBegin;
237   PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0));
238   PetscCall(PetscSectionGetNumFields(rootSection, &numFields));
239   if (numFields) {
240     IS perm;
241 
242     /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys
243        leafSection->perm. To keep this permutation set by the user, we grab
244        the reference before calling PetscSectionSetNumFields() and set it
245        back after. */
246     PetscCall(PetscSectionGetPermutation(leafSection, &perm));
247     PetscCall(PetscObjectReference((PetscObject)perm));
248     PetscCall(PetscSectionSetNumFields(leafSection, numFields));
249     PetscCall(PetscSectionSetPermutation(leafSection, perm));
250     PetscCall(ISDestroy(&perm));
251   }
252   PetscCall(PetscMalloc1(numFields + 2, &sub));
253   sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE;
254   for (f = 0; f < numFields; ++f) {
255     PetscSectionSym sym, dsym = NULL;
256     const char     *name    = NULL;
257     PetscInt        numComp = 0;
258 
259     sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE;
260     PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp));
261     PetscCall(PetscSectionGetFieldName(rootSection, f, &name));
262     PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym));
263     if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym));
264     PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp));
265     PetscCall(PetscSectionSetFieldName(leafSection, f, name));
266     PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym));
267     PetscCall(PetscSectionSymDestroy(&dsym));
268     for (c = 0; c < rootSection->numFieldComponents[f]; ++c) {
269       PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name));
270       PetscCall(PetscSectionSetComponentName(leafSection, f, c, name));
271     }
272   }
273   PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd));
274   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
275   rpEnd = PetscMin(rpEnd, nroots);
276   rpEnd = PetscMax(rpStart, rpEnd);
277   /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */
278   sub[0] = (PetscBool)(nroots != rpEnd - rpStart);
279   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
280   if (sub[0]) {
281     PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected));
282     PetscCall(ISGetIndices(selected, &indices));
283     PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF));
284     PetscCall(ISRestoreIndices(selected, &indices));
285     PetscCall(ISDestroy(&selected));
286   } else {
287     PetscCall(PetscObjectReference((PetscObject)sf));
288     embedSF = sf;
289   }
290   PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd));
291   lpEnd++;
292 
293   PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd));
294 
295   /* Constrained dof section */
296   hasc = sub[1];
297   for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]);
298 
299   /* Could fuse these at the cost of copies and extra allocation */
300   PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE));
301   PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE));
302   if (sub[1]) {
303     PetscCall(PetscSectionCheckConstraints_Private(rootSection));
304     PetscCall(PetscSectionCheckConstraints_Private(leafSection));
305     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE));
306     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE));
307   }
308   for (f = 0; f < numFields; ++f) {
309     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE));
310     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE));
311     if (sub[2 + f]) {
312       PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f]));
313       PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f]));
314       PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE));
315       PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE));
316     }
317   }
318   if (remoteOffsets) {
319     PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets));
320     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
321     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
322   }
323   PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection));
324   PetscCall(PetscSectionSetUp(leafSection));
325   if (hasc) { /* need to communicate bcIndices */
326     PetscSF   bcSF;
327     PetscInt *rOffBc;
328 
329     PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc));
330     if (sub[1]) {
331       PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
332       PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
333       PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF));
334       PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE));
335       PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE));
336       PetscCall(PetscSFDestroy(&bcSF));
337     }
338     for (f = 0; f < numFields; ++f) {
339       if (sub[2 + f]) {
340         PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
341         PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
342         PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF));
343         PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE));
344         PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE));
345         PetscCall(PetscSFDestroy(&bcSF));
346       }
347     }
348     PetscCall(PetscFree(rOffBc));
349   }
350   PetscCall(PetscSFDestroy(&embedSF));
351   PetscCall(PetscFree(sub));
352   PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*@C
357   PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes
358 
359   Collective
360 
361   Input Parameters:
362 + sf          - The `PetscSF`
363 . rootSection - Data layout of remote points for outgoing data (this is layout for roots)
364 - leafSection - Data layout of local points for incoming data  (this is layout for leaves)
365 
366   Output Parameter:
367 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL`
368 
369   Level: developer
370 
371   Note:
372   Caller must `PetscFree()` `remoteOffsets` if it was requested
373 
374   Fortran Note:
375   Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed.
376 
377 .seealso: `PetscSF`, `PetscSFCreate()`
378 @*/
379 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt *remoteOffsets[])
380 {
381   PetscSF         embedSF;
382   const PetscInt *indices;
383   IS              selected;
384   PetscInt        numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0;
385 
386   PetscFunctionBegin;
387   *remoteOffsets = NULL;
388   PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL));
389   if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS);
390   PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0));
391   PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd));
392   PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd));
393   PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected));
394   PetscCall(ISGetIndices(selected, &indices));
395   PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF));
396   PetscCall(ISRestoreIndices(selected, &indices));
397   PetscCall(ISDestroy(&selected));
398   PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets));
399   PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
400   PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
401   PetscCall(PetscSFDestroy(&embedSF));
402   PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0));
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 /*@
407   PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points
408 
409   Collective
410 
411   Input Parameters:
412 + sf            - The `PetscSF`
413 . rootSection   - Data layout of remote points for outgoing data (this is usually the serial section)
414 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL
415 - leafSection   - Data layout of local points for incoming data  (this is the distributed section)
416 
417   Output Parameter:
418 . sectionSF - The new `PetscSF`
419 
420   Level: advanced
421 
422   Notes:
423   `remoteOffsets` can be `NULL` if `sf` does not reference any points in leafSection
424 
425 .seealso: `PetscSF`, `PetscSFCreate()`
426 @*/
427 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF)
428 {
429   MPI_Comm           comm;
430   const PetscInt    *localPoints;
431   const PetscSFNode *remotePoints;
432   PetscInt           lpStart, lpEnd;
433   PetscInt           numRoots, numSectionRoots, numPoints, numIndices = 0;
434   PetscInt          *localIndices;
435   PetscSFNode       *remoteIndices;
436   PetscInt           i, ind;
437 
438   PetscFunctionBegin;
439   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
440   PetscAssertPointer(rootSection, 2);
441   /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */
442   PetscAssertPointer(leafSection, 4);
443   PetscAssertPointer(sectionSF, 5);
444   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
445   PetscCall(PetscSFCreate(comm, sectionSF));
446   PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd));
447   PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots));
448   PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints));
449   if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS);
450   PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0));
451   for (i = 0; i < numPoints; ++i) {
452     PetscInt localPoint = localPoints ? localPoints[i] : i;
453     PetscInt dof;
454 
455     if ((localPoint >= lpStart) && (localPoint < lpEnd)) {
456       PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof));
457       numIndices += dof < 0 ? 0 : dof;
458     }
459   }
460   PetscCall(PetscMalloc1(numIndices, &localIndices));
461   PetscCall(PetscMalloc1(numIndices, &remoteIndices));
462   /* Create new index graph */
463   for (i = 0, ind = 0; i < numPoints; ++i) {
464     PetscInt localPoint = localPoints ? localPoints[i] : i;
465     PetscInt rank       = remotePoints[i].rank;
466 
467     if ((localPoint >= lpStart) && (localPoint < lpEnd)) {
468       PetscInt remoteOffset = remoteOffsets[localPoint - lpStart];
469       PetscInt loff, dof, d;
470 
471       PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff));
472       PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof));
473       for (d = 0; d < dof; ++d, ++ind) {
474         localIndices[ind]        = loff + d;
475         remoteIndices[ind].rank  = rank;
476         remoteIndices[ind].index = remoteOffset + d;
477       }
478     }
479   }
480   PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices);
481   PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER));
482   PetscCall(PetscSFSetUp(*sectionSF));
483   PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0));
484   PetscFunctionReturn(PETSC_SUCCESS);
485 }
486 
487 /*@C
488   PetscSFCreateFromLayouts - Creates a parallel star forest mapping two `PetscLayout` objects
489 
490   Collective
491 
492   Input Parameters:
493 + rmap - `PetscLayout` defining the global root space
494 - lmap - `PetscLayout` defining the global leaf space
495 
496   Output Parameter:
497 . sf - The parallel star forest
498 
499   Level: intermediate
500 
501 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()`
502 @*/
503 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf)
504 {
505   PetscInt     i, nroots, nleaves = 0;
506   PetscInt     rN, lst, len;
507   PetscMPIInt  owner = -1;
508   PetscSFNode *remote;
509   MPI_Comm     rcomm = rmap->comm;
510   MPI_Comm     lcomm = lmap->comm;
511   PetscMPIInt  flg;
512 
513   PetscFunctionBegin;
514   PetscAssertPointer(sf, 3);
515   PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup");
516   PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup");
517   PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg));
518   PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators");
519   PetscCall(PetscSFCreate(rcomm, sf));
520   PetscCall(PetscLayoutGetLocalSize(rmap, &nroots));
521   PetscCall(PetscLayoutGetSize(rmap, &rN));
522   PetscCall(PetscLayoutGetRange(lmap, &lst, &len));
523   PetscCall(PetscMalloc1(len - lst, &remote));
524   for (i = lst; i < len && i < rN; i++) {
525     if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner));
526     remote[nleaves].rank  = owner;
527     remote[nleaves].index = i - rmap->range[owner];
528     nleaves++;
529   }
530   PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES));
531   PetscCall(PetscFree(remote));
532   PetscFunctionReturn(PETSC_SUCCESS);
533 }
534 
535 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */
536 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt *oidxs[], PetscInt *ogidxs[])
537 {
538   PetscInt    *owners = map->range;
539   PetscInt     n      = map->n;
540   PetscSF      sf;
541   PetscInt    *lidxs, *work = NULL, *ilocal;
542   PetscSFNode *ridxs;
543   PetscMPIInt  rank, p = 0;
544   PetscInt     r, len = 0, nleaves = 0;
545 
546   PetscFunctionBegin;
547   if (on) *on = 0; /* squelch -Wmaybe-uninitialized */
548   /* Create SF where leaves are input idxs and roots are owned idxs */
549   PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
550   PetscCall(PetscMalloc1(n, &lidxs));
551   for (r = 0; r < n; ++r) lidxs[r] = -1;
552   PetscCall(PetscMalloc1(N, &ridxs));
553   PetscCall(PetscMalloc1(N, &ilocal));
554   for (r = 0; r < N; ++r) {
555     const PetscInt idx = idxs[r];
556 
557     if (idx < 0) continue;
558     PetscCheck(idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N);
559     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */
560       PetscCall(PetscLayoutFindOwner(map, idx, &p));
561     }
562     ridxs[nleaves].rank  = p;
563     ridxs[nleaves].index = idxs[r] - owners[p];
564     ilocal[nleaves]      = r;
565     nleaves++;
566   }
567   PetscCall(PetscSFCreate(map->comm, &sf));
568   PetscCall(PetscSFSetGraph(sf, n, nleaves, ilocal, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER));
569   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR));
570   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR));
571   if (ogidxs) { /* communicate global idxs */
572     PetscInt cum = 0, start, *work2;
573 
574     PetscCall(PetscMalloc1(n, &work));
575     PetscCall(PetscCalloc1(N, &work2));
576     for (r = 0; r < N; ++r)
577       if (idxs[r] >= 0) cum++;
578     PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm));
579     start -= cum;
580     cum = 0;
581     for (r = 0; r < N; ++r)
582       if (idxs[r] >= 0) work2[r] = start + cum++;
583     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE));
584     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE));
585     PetscCall(PetscFree(work2));
586   }
587   PetscCall(PetscSFDestroy(&sf));
588   /* Compress and put in indices */
589   for (r = 0; r < n; ++r)
590     if (lidxs[r] >= 0) {
591       if (work) work[len] = work[r];
592       lidxs[len++] = r;
593     }
594   if (on) *on = len;
595   if (oidxs) *oidxs = lidxs;
596   if (ogidxs) *ogidxs = work;
597   PetscFunctionReturn(PETSC_SUCCESS);
598 }
599 
600 /*@
601   PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices
602 
603   Collective
604 
605   Input Parameters:
606 + layout           - `PetscLayout` defining the global index space and the rank that brokers each index
607 . numRootIndices   - size of rootIndices
608 . rootIndices      - `PetscInt` array of global indices of which this process requests ownership
609 . rootLocalIndices - root local index permutation (NULL if no permutation)
610 . rootLocalOffset  - offset to be added to root local indices
611 . numLeafIndices   - size of leafIndices
612 . leafIndices      - `PetscInt` array of global indices with which this process requires data associated
613 . leafLocalIndices - leaf local index permutation (NULL if no permutation)
614 - leafLocalOffset  - offset to be added to leaf local indices
615 
616   Output Parameters:
617 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed)
618 - sf  - star forest representing the communication pattern from the root space to the leaf space
619 
620   Level: advanced
621 
622   Example 1:
623 .vb
624   rank             : 0            1            2
625   rootIndices      : [1 0 2]      [3]          [3]
626   rootLocalOffset  : 100          200          300
627   layout           : [0 1]        [2]          [3]
628   leafIndices      : [0]          [2]          [0 3]
629   leafLocalOffset  : 400          500          600
630 
631 would build the following PetscSF
632 
633   [0] 400 <- (0,101)
634   [1] 500 <- (0,102)
635   [2] 600 <- (0,101)
636   [2] 601 <- (2,300)
637 .ve
638 
639   Example 2:
640 .vb
641   rank             : 0               1               2
642   rootIndices      : [1 0 2]         [3]             [3]
643   rootLocalOffset  : 100             200             300
644   layout           : [0 1]           [2]             [3]
645   leafIndices      : rootIndices     rootIndices     rootIndices
646   leafLocalOffset  : rootLocalOffset rootLocalOffset rootLocalOffset
647 
648 would build the following PetscSF
649 
650   [1] 200 <- (2,300)
651 .ve
652 
653   Example 3:
654 .vb
655   No process requests ownership of global index 1, but no process needs it.
656 
657   rank             : 0            1            2
658   numRootIndices   : 2            1            1
659   rootIndices      : [0 2]        [3]          [3]
660   rootLocalOffset  : 100          200          300
661   layout           : [0 1]        [2]          [3]
662   numLeafIndices   : 1            1            2
663   leafIndices      : [0]          [2]          [0 3]
664   leafLocalOffset  : 400          500          600
665 
666 would build the following PetscSF
667 
668   [0] 400 <- (0,100)
669   [1] 500 <- (0,101)
670   [2] 600 <- (0,100)
671   [2] 601 <- (2,300)
672 .ve
673 
674   Notes:
675   The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its
676   local size can be set to `PETSC_DECIDE`.
677 
678   If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests
679   ownership of x and sends its own rank and the local index of x to process i.
680   If multiple processes request ownership of x, the one with the highest rank is to own x.
681   Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the
682   ownership information of x.
683   The output sf is constructed by associating each leaf point to a root point in this way.
684 
685   Suppose there is point data ordered according to the global indices and partitioned according to the given layout.
686   The optional output `PetscSF` object sfA can be used to push such data to leaf points.
687 
688   All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices
689   must cover that of leafIndices, but need not cover the entire layout.
690 
691   If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output
692   star forest is almost identity, so will only include non-trivial part of the map.
693 
694   Developer Notes:
695   Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using
696   hash(rank, root_local_index) as the bid for the ownership determination.
697 
698 .seealso: `PetscSF`, `PetscSFCreate()`
699 @*/
700 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf)
701 {
702   MPI_Comm     comm = layout->comm;
703   PetscMPIInt  size, rank;
704   PetscSF      sf1;
705   PetscSFNode *owners, *buffer, *iremote;
706   PetscInt    *ilocal, nleaves, N, n, i;
707 #if defined(PETSC_USE_DEBUG)
708   PetscInt N1;
709 #endif
710   PetscBool flag;
711 
712   PetscFunctionBegin;
713   if (rootIndices) PetscAssertPointer(rootIndices, 3);
714   if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4);
715   if (leafIndices) PetscAssertPointer(leafIndices, 7);
716   if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8);
717   if (sfA) PetscAssertPointer(sfA, 10);
718   PetscAssertPointer(sf, 11);
719   PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices);
720   PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices);
721   PetscCallMPI(MPI_Comm_size(comm, &size));
722   PetscCallMPI(MPI_Comm_rank(comm, &rank));
723   PetscCall(PetscLayoutSetUp(layout));
724   PetscCall(PetscLayoutGetSize(layout, &N));
725   PetscCall(PetscLayoutGetLocalSize(layout, &n));
726   flag = (PetscBool)(leafIndices == rootIndices);
727   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPI_C_BOOL, MPI_LAND, comm));
728   PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices);
729 #if defined(PETSC_USE_DEBUG)
730   N1 = PETSC_INT_MIN;
731   for (i = 0; i < numRootIndices; i++)
732     if (rootIndices[i] > N1) N1 = rootIndices[i];
733   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm));
734   PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N);
735   if (!flag) {
736     N1 = PETSC_INT_MIN;
737     for (i = 0; i < numLeafIndices; i++)
738       if (leafIndices[i] > N1) N1 = leafIndices[i];
739     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm));
740     PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N);
741   }
742 #endif
743   /* Reduce: owners -> buffer */
744   PetscCall(PetscMalloc1(n, &buffer));
745   PetscCall(PetscSFCreate(comm, &sf1));
746   PetscCall(PetscSFSetFromOptions(sf1));
747   PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices));
748   PetscCall(PetscMalloc1(numRootIndices, &owners));
749   for (i = 0; i < numRootIndices; ++i) {
750     owners[i].rank  = rank;
751     owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i);
752   }
753   for (i = 0; i < n; ++i) {
754     buffer[i].index = -1;
755     buffer[i].rank  = -1;
756   }
757   PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC));
758   PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC));
759   /* Bcast: buffer -> owners */
760   if (!flag) {
761     /* leafIndices is different from rootIndices */
762     PetscCall(PetscFree(owners));
763     PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices));
764     PetscCall(PetscMalloc1(numLeafIndices, &owners));
765   }
766   PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE));
767   PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE));
768   for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]);
769   PetscCall(PetscFree(buffer));
770   if (sfA) {
771     *sfA = sf1;
772   } else PetscCall(PetscSFDestroy(&sf1));
773   /* Create sf */
774   if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) {
775     /* leaf space == root space */
776     for (i = 0, nleaves = 0; i < numLeafIndices; ++i)
777       if (owners[i].rank != rank) ++nleaves;
778     PetscCall(PetscMalloc1(nleaves, &ilocal));
779     PetscCall(PetscMalloc1(nleaves, &iremote));
780     for (i = 0, nleaves = 0; i < numLeafIndices; ++i) {
781       if (owners[i].rank != rank) {
782         ilocal[nleaves]        = leafLocalOffset + i;
783         iremote[nleaves].rank  = owners[i].rank;
784         iremote[nleaves].index = owners[i].index;
785         ++nleaves;
786       }
787     }
788     PetscCall(PetscFree(owners));
789   } else {
790     nleaves = numLeafIndices;
791     PetscCall(PetscMalloc1(nleaves, &ilocal));
792     for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i);
793     iremote = owners;
794   }
795   PetscCall(PetscSFCreate(comm, sf));
796   PetscCall(PetscSFSetFromOptions(*sf));
797   PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 /*@
802   PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb`
803 
804   Collective
805 
806   Input Parameters:
807 + sfa - default `PetscSF`
808 - sfb - additional edges to add/replace edges in sfa
809 
810   Output Parameter:
811 . merged - new `PetscSF` with combined edges
812 
813   Level: intermediate
814 
815 .seealso: `PetscSFCompose()`
816 @*/
817 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged)
818 {
819   PetscInt maxleaf;
820 
821   PetscFunctionBegin;
822   PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1);
823   PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2);
824   PetscCheckSameComm(sfa, 1, sfb, 2);
825   PetscAssertPointer(merged, 3);
826   {
827     PetscInt aleaf, bleaf;
828     PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf));
829     PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf));
830     maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index
831   }
832   PetscInt          *clocal, aroots, aleaves, broots, bleaves;
833   PetscSFNode       *cremote;
834   const PetscInt    *alocal, *blocal;
835   const PetscSFNode *aremote, *bremote;
836   PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote));
837   for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1;
838   PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote));
839   PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote));
840   PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space");
841   for (PetscInt i = 0; i < aleaves; i++) {
842     PetscInt a = alocal ? alocal[i] : i;
843     clocal[a]  = a;
844     cremote[a] = aremote[i];
845   }
846   for (PetscInt i = 0; i < bleaves; i++) {
847     PetscInt b = blocal ? blocal[i] : i;
848     clocal[b]  = b;
849     cremote[b] = bremote[i];
850   }
851   PetscInt nleaves = 0;
852   for (PetscInt i = 0; i < maxleaf; i++) {
853     if (clocal[i] < 0) continue;
854     clocal[nleaves]  = clocal[i];
855     cremote[nleaves] = cremote[i];
856     nleaves++;
857   }
858   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged));
859   PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES));
860   PetscCall(PetscFree2(clocal, cremote));
861   PetscFunctionReturn(PETSC_SUCCESS);
862 }
863 
864 /*@
865   PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data
866 
867   Collective
868 
869   Input Parameters:
870 + sf  - star forest
871 . bs  - stride
872 . ldr - leading dimension of root space
873 - ldl - leading dimension of leaf space
874 
875   Output Parameter:
876 . vsf - the new `PetscSF`
877 
878   Level: intermediate
879 
880   Notes:
881   This can be useful to perform communications on blocks of right-hand sides. For example, the calling sequence
882 .vb
883   c_datatype *roots, *leaves;
884   for i in [0,bs) do
885     PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op)
886     PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op)
887 .ve
888   is equivalent to
889 .vb
890   c_datatype *roots, *leaves;
891   PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf)
892   PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op)
893   PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op)
894 .ve
895 
896   Developer Notes:
897   Should this functionality be handled with a new API instead of creating a new object?
898 
899 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`
900 @*/
901 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
902 {
903   PetscSF            rankssf;
904   const PetscSFNode *iremote, *sfrremote;
905   PetscSFNode       *viremote;
906   const PetscInt    *ilocal;
907   PetscInt          *vilocal = NULL, *ldrs;
908   PetscInt           nranks, nr, nl, vnr, vnl, maxl;
909   PetscMPIInt        rank;
910   MPI_Comm           comm;
911   PetscSFType        sftype;
912 
913   PetscFunctionBegin;
914   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
915   PetscValidLogicalCollectiveInt(sf, bs, 2);
916   PetscAssertPointer(vsf, 5);
917   if (bs == 1) {
918     PetscCall(PetscObjectReference((PetscObject)sf));
919     *vsf = sf;
920     PetscFunctionReturn(PETSC_SUCCESS);
921   }
922   PetscCall(PetscSFSetUp(sf));
923   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
924   PetscCallMPI(MPI_Comm_rank(comm, &rank));
925   PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote));
926   PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl));
927   maxl += 1;
928   if (ldl == PETSC_DECIDE) ldl = maxl;
929   if (ldr == PETSC_DECIDE) ldr = nr;
930   PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr);
931   PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1);
932   vnr = nr * bs;
933   vnl = nl * bs;
934   PetscCall(PetscMalloc1(vnl, &viremote));
935   PetscCall(PetscMalloc1(vnl, &vilocal));
936 
937   /* Communicate root leading dimensions to leaf ranks */
938   PetscCall(PetscSFGetRanksSF(sf, &rankssf));
939   PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote));
940   PetscCall(PetscMalloc1(nranks, &ldrs));
941   PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
942   PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
943 
944   for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) {
945     const PetscInt r  = iremote[i].rank;
946     const PetscInt ii = iremote[i].index;
947 
948     if (r == rank) lda = ldr;
949     else if (rold != r) {
950       PetscInt j;
951 
952       for (j = 0; j < nranks; j++)
953         if (sfrremote[j].rank == r) break;
954       PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r);
955       lda = ldrs[j];
956     }
957     rold = r;
958     for (PetscInt v = 0; v < bs; v++) {
959       viremote[v * nl + i].rank  = r;
960       viremote[v * nl + i].index = v * lda + ii;
961       vilocal[v * nl + i]        = v * ldl + (ilocal ? ilocal[i] : i);
962     }
963   }
964   PetscCall(PetscFree(ldrs));
965   PetscCall(PetscSFCreate(comm, vsf));
966   PetscCall(PetscSFGetType(sf, &sftype));
967   PetscCall(PetscSFSetType(*vsf, sftype));
968   PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER));
969   PetscFunctionReturn(PETSC_SUCCESS);
970 }
971