xref: /petsc/src/vec/is/sf/utils/sfutils.c (revision c46fcffb3e60e151c78cb2e518f9562efac8633f)
1 #include <petsc/private/sfimpl.h> /*I  "petscsf.h"   I*/
2 #include <petsc/private/sectionimpl.h>
3 
4 /*@
5   PetscSFSetGraphLayout - Set a `PetscSF` communication pattern using global indices and a `PetscLayout`
6 
7   Collective
8 
9   Input Parameters:
10 + sf        - star forest
11 . layout    - `PetscLayout` defining the global space for roots, i.e. which roots are owned by each MPI process
12 . nleaves   - number of leaf vertices on the current process, each of these references a root on any MPI process
13 . ilocal    - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage, that is the locations are in [0,`nleaves`)
14 . localmode - copy mode for `ilocal`
15 - gremote   - root vertices in global numbering corresponding to the leaves
16 
17   Level: intermediate
18 
19   Note:
20   Global indices must lie in [0, N) where N is the global size of `layout`.
21   Leaf indices in `ilocal` get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`.
22 
23   Developer Notes:
24   Local indices which are the identity permutation in the range [0,`nleaves`) are discarded as they
25   encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not
26   needed
27 
28 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
29 @*/
30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, const PetscInt gremote[])
31 {
32   const PetscInt *range;
33   PetscInt        i, nroots, ls = -1, ln = -1;
34   PetscMPIInt     lr = -1;
35   PetscSFNode    *remote;
36 
37   PetscFunctionBegin;
38   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
39   PetscAssertPointer(layout, 2);
40   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
41   if (nleaves > 0) PetscAssertPointer(gremote, 6);
42   PetscCall(PetscLayoutSetUp(layout));
43   PetscCall(PetscLayoutGetLocalSize(layout, &nroots));
44   PetscCall(PetscLayoutGetRanges(layout, &range));
45   PetscCall(PetscMalloc1(nleaves, &remote));
46   if (nleaves) ls = gremote[0] + 1;
47   for (i = 0; i < nleaves; i++) {
48     const PetscInt idx = gremote[i] - ls;
49     if (idx < 0 || idx >= ln) { /* short-circuit the search */
50       PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index));
51       remote[i].rank = lr;
52       ls             = range[lr];
53       ln             = range[lr + 1] - ls;
54     } else {
55       remote[i].rank  = lr;
56       remote[i].index = idx;
57     }
58   }
59   PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER));
60   PetscFunctionReturn(PETSC_SUCCESS);
61 }
62 
63 /*@C
64   PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe a `PetscSF`
65 
66   Collective
67 
68   Input Parameter:
69 . sf - star forest
70 
71   Output Parameters:
72 + layout  - `PetscLayout` defining the global space for roots
73 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
74 . ilocal  - locations of leaves in leafdata buffers, or `NULL` for contiguous storage
75 - gremote - root vertices in global numbering corresponding to the leaves
76 
77   Level: intermediate
78 
79   Notes:
80   The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest.
81   The outputs `layout` and `gremote` are freshly created each time this function is called,
82   so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user.
83 
84 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
85 @*/
86 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[])
87 {
88   PetscInt           nr, nl;
89   const PetscSFNode *ir;
90   PetscLayout        lt;
91 
92   PetscFunctionBegin;
93   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
94   if (layout) PetscAssertPointer(layout, 2);
95   if (nleaves) PetscAssertPointer(nleaves, 3);
96   if (ilocal) PetscAssertPointer(ilocal, 4);
97   if (gremote) PetscAssertPointer(gremote, 5);
98   PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir));
99   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, &lt));
100   if (gremote) {
101     PetscInt        i;
102     const PetscInt *range;
103     PetscInt       *gr;
104 
105     PetscCall(PetscLayoutGetRanges(lt, &range));
106     PetscCall(PetscMalloc1(nl, &gr));
107     for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index;
108     *gremote = gr;
109   }
110   if (nleaves) *nleaves = nl;
111   if (layout) *layout = lt;
112   else PetscCall(PetscLayoutDestroy(&lt));
113   PetscFunctionReturn(PETSC_SUCCESS);
114 }
115 
116 /*@
117   PetscSFSetGraphSection - Sets the `PetscSF` graph (communication pattern) encoding the parallel dof overlap based upon the `PetscSection` describing the data layout.
118 
119   Input Parameters:
120 + sf            - The `PetscSF`
121 . localSection  - `PetscSection` describing the local data layout
122 - globalSection - `PetscSection` describing the global data layout
123 
124   Level: developer
125 
126 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()`
127 @*/
128 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection)
129 {
130   MPI_Comm        comm;
131   PetscLayout     layout;
132   const PetscInt *ranges;
133   PetscInt       *local;
134   PetscSFNode    *remote;
135   PetscInt        pStart, pEnd, p, nroots, nleaves = 0, l;
136   PetscMPIInt     size, rank;
137 
138   PetscFunctionBegin;
139   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
140   PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2);
141   PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3);
142 
143   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
144   PetscCallMPI(MPI_Comm_size(comm, &size));
145   PetscCallMPI(MPI_Comm_rank(comm, &rank));
146   PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd));
147   PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots));
148   PetscCall(PetscLayoutCreate(comm, &layout));
149   PetscCall(PetscLayoutSetBlockSize(layout, 1));
150   PetscCall(PetscLayoutSetLocalSize(layout, nroots));
151   PetscCall(PetscLayoutSetUp(layout));
152   PetscCall(PetscLayoutGetRanges(layout, &ranges));
153   for (p = pStart; p < pEnd; ++p) {
154     PetscInt gdof, gcdof;
155 
156     PetscCall(PetscSectionGetDof(globalSection, p, &gdof));
157     PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof));
158     PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof);
159     nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof;
160   }
161   PetscCall(PetscMalloc1(nleaves, &local));
162   PetscCall(PetscMalloc1(nleaves, &remote));
163   for (p = pStart, l = 0; p < pEnd; ++p) {
164     const PetscInt *cind;
165     PetscInt        dof, cdof, off, gdof, gcdof, goff, gsize, d, c;
166 
167     PetscCall(PetscSectionGetDof(localSection, p, &dof));
168     PetscCall(PetscSectionGetOffset(localSection, p, &off));
169     PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof));
170     PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind));
171     PetscCall(PetscSectionGetDof(globalSection, p, &gdof));
172     PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof));
173     PetscCall(PetscSectionGetOffset(globalSection, p, &goff));
174     if (!gdof) continue; /* Censored point */
175     gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof;
176     if (gsize != dof - cdof) {
177       PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof);
178       cdof = 0; /* Ignore constraints */
179     }
180     for (d = 0, c = 0; d < dof; ++d) {
181       if ((c < cdof) && (cind[c] == d)) {
182         ++c;
183         continue;
184       }
185       local[l + d - c] = off + d;
186     }
187     PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c);
188     if (gdof < 0) {
189       for (d = 0; d < gsize; ++d, ++l) {
190         PetscInt    offset = -(goff + 1) + d, ir;
191         PetscMPIInt r;
192 
193         PetscCall(PetscFindInt(offset, size + 1, ranges, &ir));
194         PetscCall(PetscMPIIntCast(ir, &r));
195         if (r < 0) r = -(r + 2);
196         PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff);
197         remote[l].rank  = r;
198         remote[l].index = offset - ranges[r];
199       }
200     } else {
201       for (d = 0; d < gsize; ++d, ++l) {
202         remote[l].rank  = rank;
203         remote[l].index = goff + d - ranges[rank];
204       }
205     }
206   }
207   PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves);
208   PetscCall(PetscLayoutDestroy(&layout));
209   PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER));
210   PetscFunctionReturn(PETSC_SUCCESS);
211 }
212 
213 /*@C
214   PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF`
215 
216   Collective
217 
218   Input Parameters:
219 + sf          - The `PetscSF`
220 - rootSection - Section defined on root space
221 
222   Output Parameters:
223 + remoteOffsets - root offsets in leaf storage, or `NULL`, its length will be the size of the chart of `leafSection`
224 - leafSection   - Section defined on the leaf space
225 
226   Level: advanced
227 
228   Note:
229   Caller must `PetscFree()` `remoteOffsets` if it was requested
230 
231   Fortran Note:
232   Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed.
233 
234 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
235 @*/
236 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt *remoteOffsets[], PetscSection leafSection)
237 {
238   PetscSF         embedSF;
239   const PetscInt *indices;
240   IS              selected;
241   PetscInt        numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c;
242   PetscBool      *sub, hasc;
243 
244   PetscFunctionBegin;
245   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
246   PetscValidHeaderSpecific(rootSection, PETSC_SECTION_CLASSID, 2);
247   if (remoteOffsets) PetscAssertPointer(remoteOffsets, 3);
248   PetscValidHeaderSpecific(leafSection, PETSC_SECTION_CLASSID, 4);
249   PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0));
250   PetscCall(PetscSectionGetNumFields(rootSection, &numFields));
251   if (numFields) {
252     IS perm;
253 
254     /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys
255        leafSection->perm. To keep this permutation set by the user, we grab
256        the reference before calling PetscSectionSetNumFields() and set it
257        back after. */
258     PetscCall(PetscSectionGetPermutation(leafSection, &perm));
259     PetscCall(PetscObjectReference((PetscObject)perm));
260     PetscCall(PetscSectionSetNumFields(leafSection, numFields));
261     PetscCall(PetscSectionSetPermutation(leafSection, perm));
262     PetscCall(ISDestroy(&perm));
263   }
264   PetscCall(PetscMalloc1(numFields + 2, &sub));
265   sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE;
266   for (f = 0; f < numFields; ++f) {
267     PetscSectionSym sym, dsym = NULL;
268     const char     *name    = NULL;
269     PetscInt        numComp = 0;
270 
271     sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE;
272     PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp));
273     PetscCall(PetscSectionGetFieldName(rootSection, f, &name));
274     PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym));
275     if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym));
276     PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp));
277     PetscCall(PetscSectionSetFieldName(leafSection, f, name));
278     PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym));
279     PetscCall(PetscSectionSymDestroy(&dsym));
280     for (c = 0; c < rootSection->numFieldComponents[f]; ++c) {
281       PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name));
282       PetscCall(PetscSectionSetComponentName(leafSection, f, c, name));
283     }
284   }
285   PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd));
286   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
287   rpEnd = PetscMin(rpEnd, nroots);
288   rpEnd = PetscMax(rpStart, rpEnd);
289   /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */
290   sub[0] = (PetscBool)(nroots != rpEnd - rpStart);
291   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
292   if (sub[0]) {
293     PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected));
294     PetscCall(ISGetIndices(selected, &indices));
295     PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF));
296     PetscCall(ISRestoreIndices(selected, &indices));
297     PetscCall(ISDestroy(&selected));
298   } else {
299     PetscCall(PetscObjectReference((PetscObject)sf));
300     embedSF = sf;
301   }
302   PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd));
303   lpEnd++;
304 
305   PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd));
306 
307   /* Constrained dof section */
308   hasc = sub[1];
309   for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]);
310 
311   /* Could fuse these at the cost of copies and extra allocation */
312   PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE));
313   PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE));
314   if (sub[1]) {
315     PetscCall(PetscSectionCheckConstraints_Private(rootSection));
316     PetscCall(PetscSectionCheckConstraints_Private(leafSection));
317     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE));
318     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE));
319   }
320   for (f = 0; f < numFields; ++f) {
321     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE));
322     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE));
323     if (sub[2 + f]) {
324       PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f]));
325       PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f]));
326       PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE));
327       PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE));
328     }
329   }
330   if (remoteOffsets) {
331     PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets));
332     PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
333     PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
334   }
335   PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection));
336   PetscCall(PetscSectionSetUp(leafSection));
337   if (hasc) { /* need to communicate bcIndices */
338     PetscSF   bcSF;
339     PetscInt *rOffBc;
340 
341     PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc));
342     if (sub[1]) {
343       PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
344       PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
345       PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF));
346       PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE));
347       PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE));
348       PetscCall(PetscSFDestroy(&bcSF));
349     }
350     for (f = 0; f < numFields; ++f) {
351       if (sub[2 + f]) {
352         PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
353         PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
354         PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF));
355         PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE));
356         PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE));
357         PetscCall(PetscSFDestroy(&bcSF));
358       }
359     }
360     PetscCall(PetscFree(rOffBc));
361   }
362   PetscCall(PetscSFDestroy(&embedSF));
363   PetscCall(PetscFree(sub));
364   PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 /*@C
369   PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes
370 
371   Collective
372 
373   Input Parameters:
374 + sf          - The `PetscSF`
375 . rootSection - Data layout of remote points for outgoing data (this is layout for roots)
376 - leafSection - Data layout of local points for incoming data  (this is layout for leaves)
377 
378   Output Parameter:
379 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL`
380 
381   Level: developer
382 
383   Note:
384   Caller must `PetscFree()` `remoteOffsets` if it was requested
385 
386   Fortran Note:
387   Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed.
388 
389 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
390 @*/
391 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt *remoteOffsets[])
392 {
393   PetscSF         embedSF;
394   const PetscInt *indices;
395   IS              selected;
396   PetscInt        numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0;
397 
398   PetscFunctionBegin;
399   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
400   PetscValidHeaderSpecific(rootSection, PETSC_SECTION_CLASSID, 2);
401   PetscValidHeaderSpecific(leafSection, PETSC_SECTION_CLASSID, 3);
402   PetscAssertPointer(remoteOffsets, 4);
403   *remoteOffsets = NULL;
404   PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL));
405   if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS);
406   PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0));
407   PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd));
408   PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd));
409   PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected));
410   PetscCall(ISGetIndices(selected, &indices));
411   PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF));
412   PetscCall(ISRestoreIndices(selected, &indices));
413   PetscCall(ISDestroy(&selected));
414   PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets));
415   PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
416   PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
417   PetscCall(PetscSFDestroy(&embedSF));
418   PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0));
419   PetscFunctionReturn(PETSC_SUCCESS);
420 }
421 
422 /*@
423   PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points
424 
425   Collective
426 
427   Input Parameters:
428 + sf            - The `PetscSF`
429 . rootSection   - Data layout of remote points for outgoing data (this is usually the serial section)
430 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL`
431 - leafSection   - Data layout of local points for incoming data  (this is the distributed section)
432 
433   Output Parameter:
434 . sectionSF - The new `PetscSF`
435 
436   Level: advanced
437 
438   Notes:
439   `remoteOffsets` can be `NULL` if `sf` does not reference any points in leafSection
440 
441 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
442 @*/
443 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF)
444 {
445   MPI_Comm           comm;
446   const PetscInt    *localPoints;
447   const PetscSFNode *remotePoints;
448   PetscInt           lpStart, lpEnd;
449   PetscInt           numRoots, numSectionRoots, numPoints, numIndices = 0;
450   PetscInt          *localIndices;
451   PetscSFNode       *remoteIndices;
452   PetscInt           i, ind;
453 
454   PetscFunctionBegin;
455   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
456   PetscAssertPointer(rootSection, 2);
457   /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */
458   PetscAssertPointer(leafSection, 4);
459   PetscAssertPointer(sectionSF, 5);
460   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
461   PetscCall(PetscSFCreate(comm, sectionSF));
462   PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd));
463   PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots));
464   PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints));
465   if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS);
466   PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0));
467   for (i = 0; i < numPoints; ++i) {
468     PetscInt localPoint = localPoints ? localPoints[i] : i;
469     PetscInt dof;
470 
471     if ((localPoint >= lpStart) && (localPoint < lpEnd)) {
472       PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof));
473       numIndices += dof < 0 ? 0 : dof;
474     }
475   }
476   PetscCall(PetscMalloc1(numIndices, &localIndices));
477   PetscCall(PetscMalloc1(numIndices, &remoteIndices));
478   /* Create new index graph */
479   for (i = 0, ind = 0; i < numPoints; ++i) {
480     PetscInt localPoint = localPoints ? localPoints[i] : i;
481     PetscInt rank       = remotePoints[i].rank;
482 
483     if ((localPoint >= lpStart) && (localPoint < lpEnd)) {
484       PetscInt remoteOffset = remoteOffsets[localPoint - lpStart];
485       PetscInt loff, dof, d;
486 
487       PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff));
488       PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof));
489       for (d = 0; d < dof; ++d, ++ind) {
490         localIndices[ind]        = loff + d;
491         remoteIndices[ind].rank  = rank;
492         remoteIndices[ind].index = remoteOffset + d;
493       }
494     }
495   }
496   PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices);
497   PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER));
498   PetscCall(PetscSFSetUp(*sectionSF));
499   PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0));
500   PetscFunctionReturn(PETSC_SUCCESS);
501 }
502 
503 /*@C
504   PetscSFCreateFromLayouts - Creates a parallel star forest mapping between two `PetscLayout` objects
505 
506   Collective
507 
508   Input Parameters:
509 + rmap - `PetscLayout` defining the global root space
510 - lmap - `PetscLayout` defining the global leaf space
511 
512   Output Parameter:
513 . sf - The parallel star forest
514 
515   Level: intermediate
516 
517   Notes:
518   If the global length of `lmap` differs from the global length of `rmap` then the excess entries are ignored.
519 
520   The resulting `sf` used with `PetscSFBcastBegin()` and `PetscSFBcastEnd()` merely copies the array entries of `rootdata` to
521   `leafdata`; moving them between MPI processes if needed. For example,
522   if rmap is [0, 3, 5) and lmap is [0, 2, 6) and `rootdata` is (1, 2, 3) on MPI rank 0 and (4, 5) on MPI rank 1 then the
523   `leafdata` would become (1, 2) on MPI rank 0 and (3, 4, 5, x) on MPI rank 1.
524 
525 .seealso: [](sec_petscsf), `PetscSF`, `PetscLayout`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()`
526 @*/
527 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf)
528 {
529   PetscInt     i, nroots, nleaves = 0;
530   PetscInt     rN, lst, len;
531   PetscMPIInt  owner = -1;
532   PetscSFNode *remote;
533   MPI_Comm     rcomm = rmap->comm;
534   MPI_Comm     lcomm = lmap->comm;
535   PetscMPIInt  flg;
536 
537   PetscFunctionBegin;
538   PetscAssertPointer(rmap, 1);
539   PetscAssertPointer(lmap, 2);
540   PetscAssertPointer(sf, 3);
541   PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup");
542   PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup");
543   PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg));
544   PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators");
545   PetscCall(PetscSFCreate(rcomm, sf));
546   PetscCall(PetscLayoutGetLocalSize(rmap, &nroots));
547   PetscCall(PetscLayoutGetSize(rmap, &rN));
548   PetscCall(PetscLayoutGetRange(lmap, &lst, &len));
549   PetscCall(PetscMalloc1(len - lst, &remote));
550   for (i = lst; i < len && i < rN; i++) {
551     if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner));
552     remote[nleaves].rank  = owner;
553     remote[nleaves].index = i - rmap->range[owner];
554     nleaves++;
555   }
556   PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES));
557   PetscCall(PetscFree(remote));
558   PetscFunctionReturn(PETSC_SUCCESS);
559 }
560 
561 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */
562 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt *oidxs[], PetscInt *ogidxs[])
563 {
564   PetscInt    *owners = map->range;
565   PetscInt     n      = map->n;
566   PetscSF      sf;
567   PetscInt    *lidxs, *work = NULL, *ilocal;
568   PetscSFNode *ridxs;
569   PetscMPIInt  rank, p = 0;
570   PetscInt     r, len = 0, nleaves = 0;
571 
572   PetscFunctionBegin;
573   if (on) *on = 0; /* squelch -Wmaybe-uninitialized */
574   /* Create SF where leaves are input idxs and roots are owned idxs */
575   PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
576   PetscCall(PetscMalloc1(n, &lidxs));
577   for (r = 0; r < n; ++r) lidxs[r] = -1;
578   PetscCall(PetscMalloc1(N, &ridxs));
579   PetscCall(PetscMalloc1(N, &ilocal));
580   for (r = 0; r < N; ++r) {
581     const PetscInt idx = idxs[r];
582 
583     if (idx < 0) continue;
584     PetscCheck(idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N);
585     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */
586       PetscCall(PetscLayoutFindOwner(map, idx, &p));
587     }
588     ridxs[nleaves].rank  = p;
589     ridxs[nleaves].index = idxs[r] - owners[p];
590     ilocal[nleaves]      = r;
591     nleaves++;
592   }
593   PetscCall(PetscSFCreate(map->comm, &sf));
594   PetscCall(PetscSFSetGraph(sf, n, nleaves, ilocal, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER));
595   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR));
596   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR));
597   if (ogidxs) { /* communicate global idxs */
598     PetscInt cum = 0, start, *work2;
599 
600     PetscCall(PetscMalloc1(n, &work));
601     PetscCall(PetscCalloc1(N, &work2));
602     for (r = 0; r < N; ++r)
603       if (idxs[r] >= 0) cum++;
604     PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm));
605     start -= cum;
606     cum = 0;
607     for (r = 0; r < N; ++r)
608       if (idxs[r] >= 0) work2[r] = start + cum++;
609     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE));
610     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE));
611     PetscCall(PetscFree(work2));
612   }
613   PetscCall(PetscSFDestroy(&sf));
614   /* Compress and put in indices */
615   for (r = 0; r < n; ++r)
616     if (lidxs[r] >= 0) {
617       if (work) work[len] = work[r];
618       lidxs[len++] = r;
619     }
620   if (on) *on = len;
621   if (oidxs) *oidxs = lidxs;
622   if (ogidxs) *ogidxs = work;
623   PetscFunctionReturn(PETSC_SUCCESS);
624 }
625 
626 /*@
627   PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices
628 
629   Collective
630 
631   Input Parameters:
632 + layout           - `PetscLayout` defining the global index space and the MPI rank that brokers each index
633 . numRootIndices   - size of `rootIndices`
634 . rootIndices      - array of global indices of which this process requests ownership
635 . rootLocalIndices - root local index permutation (`NULL` if no permutation)
636 . rootLocalOffset  - offset to be added to `rootLocalIndices`
637 . numLeafIndices   - size of `leafIndices`
638 . leafIndices      - array of global indices with which this process requires data associated
639 . leafLocalIndices - leaf local index permutation (`NULL` if no permutation)
640 - leafLocalOffset  - offset to be added to `leafLocalIndices`
641 
642   Output Parameters:
643 + sfA - star forest representing the communication pattern from the layout space to the leaf space (`NULL` if not needed)
644 - sf  - star forest representing the communication pattern from the root space to the leaf space
645 
646   Level: advanced
647 
648   Example 1:
649 .vb
650   rank             : 0            1            2
651   rootIndices      : [1 0 2]      [3]          [3]
652   rootLocalOffset  : 100          200          300
653   layout           : [0 1]        [2]          [3]
654   leafIndices      : [0]          [2]          [0 3]
655   leafLocalOffset  : 400          500          600
656 
657 would build the following PetscSF
658 
659   [0] 400 <- (0,101)
660   [1] 500 <- (0,102)
661   [2] 600 <- (0,101)
662   [2] 601 <- (2,300)
663 .ve
664 
665   Example 2:
666 .vb
667   rank             : 0               1               2
668   rootIndices      : [1 0 2]         [3]             [3]
669   rootLocalOffset  : 100             200             300
670   layout           : [0 1]           [2]             [3]
671   leafIndices      : rootIndices     rootIndices     rootIndices
672   leafLocalOffset  : rootLocalOffset rootLocalOffset rootLocalOffset
673 
674 would build the following PetscSF
675 
676   [1] 200 <- (2,300)
677 .ve
678 
679   Example 3:
680 .vb
681   No process requests ownership of global index 1, but no process needs it.
682 
683   rank             : 0            1            2
684   numRootIndices   : 2            1            1
685   rootIndices      : [0 2]        [3]          [3]
686   rootLocalOffset  : 100          200          300
687   layout           : [0 1]        [2]          [3]
688   numLeafIndices   : 1            1            2
689   leafIndices      : [0]          [2]          [0 3]
690   leafLocalOffset  : 400          500          600
691 
692 would build the following PetscSF
693 
694   [0] 400 <- (0,100)
695   [1] 500 <- (0,101)
696   [2] 600 <- (0,100)
697   [2] 601 <- (2,300)
698 .ve
699 
700   Notes:
701   `layout` represents any partitioning of [0, N), where N is the total number of global indices, and its
702   local size can be set to `PETSC_DECIDE`.
703 
704   If a global index x lies in the partition owned by process i, each process whose `rootIndices` contains x requests
705   ownership of x and sends its own rank and the local index of x to process i.
706   If multiple processes request ownership of x, the one with the highest rank is to own x.
707   Process i then broadcasts the ownership information, so that each process whose `leafIndices` contains x knows the
708   ownership information of x.
709   The output `sf` is constructed by associating each leaf point to a root point in this way.
710 
711   Suppose there is point data ordered according to the global indices and partitioned according to the given layout.
712   The optional output `sfA` can be used to push such data to leaf points.
713 
714   All indices in `rootIndices` and `leafIndices` must lie in the layout range. The union (over all processes) of `rootIndices`
715   must cover that of `leafIndices`, but need not cover the entire layout.
716 
717   If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output
718   star forest is almost identity, so will only include non-trivial part of the map.
719 
720   Developer Notes:
721   Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using
722   hash(rank, root_local_index) as the bid for the ownership determination.
723 
724 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
725 @*/
726 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt rootIndices[], const PetscInt rootLocalIndices[], PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt leafIndices[], const PetscInt leafLocalIndices[], PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf)
727 {
728   MPI_Comm     comm = layout->comm;
729   PetscMPIInt  size, rank;
730   PetscSF      sf1;
731   PetscSFNode *owners, *buffer, *iremote;
732   PetscInt    *ilocal, nleaves, N, n, i;
733 #if defined(PETSC_USE_DEBUG)
734   PetscInt N1;
735 #endif
736   PetscBool flag;
737 
738   PetscFunctionBegin;
739   PetscAssertPointer(layout, 1);
740   if (rootIndices) PetscAssertPointer(rootIndices, 3);
741   if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4);
742   if (leafIndices) PetscAssertPointer(leafIndices, 7);
743   if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8);
744   if (sfA) PetscAssertPointer(sfA, 10);
745   PetscAssertPointer(sf, 11);
746   PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices);
747   PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices);
748   PetscCallMPI(MPI_Comm_size(comm, &size));
749   PetscCallMPI(MPI_Comm_rank(comm, &rank));
750   PetscCall(PetscLayoutSetUp(layout));
751   PetscCall(PetscLayoutGetSize(layout, &N));
752   PetscCall(PetscLayoutGetLocalSize(layout, &n));
753   flag = (PetscBool)(leafIndices == rootIndices);
754   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPI_C_BOOL, MPI_LAND, comm));
755   PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices);
756 #if defined(PETSC_USE_DEBUG)
757   N1 = PETSC_INT_MIN;
758   for (i = 0; i < numRootIndices; i++)
759     if (rootIndices[i] > N1) N1 = rootIndices[i];
760   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm));
761   PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N);
762   if (!flag) {
763     N1 = PETSC_INT_MIN;
764     for (i = 0; i < numLeafIndices; i++)
765       if (leafIndices[i] > N1) N1 = leafIndices[i];
766     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm));
767     PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N);
768   }
769 #endif
770   /* Reduce: owners -> buffer */
771   PetscCall(PetscMalloc1(n, &buffer));
772   PetscCall(PetscSFCreate(comm, &sf1));
773   PetscCall(PetscSFSetFromOptions(sf1));
774   PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices));
775   PetscCall(PetscMalloc1(numRootIndices, &owners));
776   for (i = 0; i < numRootIndices; ++i) {
777     owners[i].rank  = rank;
778     owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i);
779   }
780   for (i = 0; i < n; ++i) {
781     buffer[i].index = -1;
782     buffer[i].rank  = -1;
783   }
784   PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC));
785   PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC));
786   /* Bcast: buffer -> owners */
787   if (!flag) {
788     /* leafIndices is different from rootIndices */
789     PetscCall(PetscFree(owners));
790     PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices));
791     PetscCall(PetscMalloc1(numLeafIndices, &owners));
792   }
793   PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE));
794   PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE));
795   for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]);
796   PetscCall(PetscFree(buffer));
797   if (sfA) {
798     *sfA = sf1;
799   } else PetscCall(PetscSFDestroy(&sf1));
800   /* Create sf */
801   if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) {
802     /* leaf space == root space */
803     for (i = 0, nleaves = 0; i < numLeafIndices; ++i)
804       if (owners[i].rank != rank) ++nleaves;
805     PetscCall(PetscMalloc1(nleaves, &ilocal));
806     PetscCall(PetscMalloc1(nleaves, &iremote));
807     for (i = 0, nleaves = 0; i < numLeafIndices; ++i) {
808       if (owners[i].rank != rank) {
809         ilocal[nleaves]        = leafLocalOffset + i;
810         iremote[nleaves].rank  = owners[i].rank;
811         iremote[nleaves].index = owners[i].index;
812         ++nleaves;
813       }
814     }
815     PetscCall(PetscFree(owners));
816   } else {
817     nleaves = numLeafIndices;
818     PetscCall(PetscMalloc1(nleaves, &ilocal));
819     for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i);
820     iremote = owners;
821   }
822   PetscCall(PetscSFCreate(comm, sf));
823   PetscCall(PetscSFSetFromOptions(*sf));
824   PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
825   PetscFunctionReturn(PETSC_SUCCESS);
826 }
827 
828 /*@
829   PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb`
830 
831   Collective
832 
833   Input Parameters:
834 + sfa - default `PetscSF`
835 - sfb - additional edges to add/replace edges in `sfa`
836 
837   Output Parameter:
838 . merged - new `PetscSF` with combined edges
839 
840   Level: intermediate
841 
842 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`
843 @*/
844 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged)
845 {
846   PetscInt maxleaf;
847 
848   PetscFunctionBegin;
849   PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1);
850   PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2);
851   PetscCheckSameComm(sfa, 1, sfb, 2);
852   PetscAssertPointer(merged, 3);
853   {
854     PetscInt aleaf, bleaf;
855     PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf));
856     PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf));
857     maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index
858   }
859   PetscInt          *clocal, aroots, aleaves, broots, bleaves;
860   PetscSFNode       *cremote;
861   const PetscInt    *alocal, *blocal;
862   const PetscSFNode *aremote, *bremote;
863   PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote));
864   for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1;
865   PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote));
866   PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote));
867   PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space");
868   for (PetscInt i = 0; i < aleaves; i++) {
869     PetscInt a = alocal ? alocal[i] : i;
870     clocal[a]  = a;
871     cremote[a] = aremote[i];
872   }
873   for (PetscInt i = 0; i < bleaves; i++) {
874     PetscInt b = blocal ? blocal[i] : i;
875     clocal[b]  = b;
876     cremote[b] = bremote[i];
877   }
878   PetscInt nleaves = 0;
879   for (PetscInt i = 0; i < maxleaf; i++) {
880     if (clocal[i] < 0) continue;
881     clocal[nleaves]  = clocal[i];
882     cremote[nleaves] = cremote[i];
883     nleaves++;
884   }
885   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged));
886   PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES));
887   PetscCall(PetscFree2(clocal, cremote));
888   PetscFunctionReturn(PETSC_SUCCESS);
889 }
890 
891 /*@
892   PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data
893 
894   Collective
895 
896   Input Parameters:
897 + sf  - star forest
898 . bs  - stride
899 . ldr - leading dimension of root space
900 - ldl - leading dimension of leaf space
901 
902   Output Parameter:
903 . vsf - the new `PetscSF`
904 
905   Level: intermediate
906 
907   Notes:
908   This can be useful to perform communications on multiple right-hand sides stored in a Fortran-style two dimensional array.
909   For example, the calling sequence
910 .vb
911   c_datatype *roots, *leaves;
912   for i in [0,bs) do
913     PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op)
914     PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op)
915 .ve
916   is equivalent to
917 .vb
918   c_datatype *roots, *leaves;
919   PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf)
920   PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op)
921   PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op)
922 .ve
923 
924   Developer Notes:
925   Should this functionality be handled with a new API instead of creating a new object?
926 
927 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`
928 @*/
929 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
930 {
931   PetscSF            rankssf;
932   const PetscSFNode *iremote, *sfrremote;
933   PetscSFNode       *viremote;
934   const PetscInt    *ilocal;
935   PetscInt          *vilocal = NULL, *ldrs;
936   PetscInt           nranks, nr, nl, vnr, vnl, maxl;
937   PetscMPIInt        rank;
938   MPI_Comm           comm;
939   PetscSFType        sftype;
940 
941   PetscFunctionBegin;
942   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
943   PetscValidLogicalCollectiveInt(sf, bs, 2);
944   PetscAssertPointer(vsf, 5);
945   if (bs == 1) {
946     PetscCall(PetscObjectReference((PetscObject)sf));
947     *vsf = sf;
948     PetscFunctionReturn(PETSC_SUCCESS);
949   }
950   PetscCall(PetscSFSetUp(sf));
951   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
952   PetscCallMPI(MPI_Comm_rank(comm, &rank));
953   PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote));
954   PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl));
955   maxl += 1;
956   if (ldl == PETSC_DECIDE) ldl = maxl;
957   if (ldr == PETSC_DECIDE) ldr = nr;
958   PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr);
959   PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1);
960   vnr = nr * bs;
961   vnl = nl * bs;
962   PetscCall(PetscMalloc1(vnl, &viremote));
963   PetscCall(PetscMalloc1(vnl, &vilocal));
964 
965   /* Communicate root leading dimensions to leaf ranks */
966   PetscCall(PetscSFGetRanksSF(sf, &rankssf));
967   PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote));
968   PetscCall(PetscMalloc1(nranks, &ldrs));
969   PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
970   PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
971 
972   for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) {
973     const PetscInt r  = iremote[i].rank;
974     const PetscInt ii = iremote[i].index;
975 
976     if (r == rank) lda = ldr;
977     else if (rold != r) {
978       PetscInt j;
979 
980       for (j = 0; j < nranks; j++)
981         if (sfrremote[j].rank == r) break;
982       PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r);
983       lda = ldrs[j];
984     }
985     rold = r;
986     for (PetscInt v = 0; v < bs; v++) {
987       viremote[v * nl + i].rank  = r;
988       viremote[v * nl + i].index = v * lda + ii;
989       vilocal[v * nl + i]        = v * ldl + (ilocal ? ilocal[i] : i);
990     }
991   }
992   PetscCall(PetscFree(ldrs));
993   PetscCall(PetscSFCreate(comm, vsf));
994   PetscCall(PetscSFGetType(sf, &sftype));
995   PetscCall(PetscSFSetType(*vsf, sftype));
996   PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER));
997   PetscFunctionReturn(PETSC_SUCCESS);
998 }
999