1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> /* for VecScatterRemap_Internal */
3 #include <../src/vec/is/sf/impls/basic/sfpack.h>
4 #include <petsc/private/vecimpl.h>
5
6 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm, const PetscMPIInt[], const PetscInt[], PetscMPIInt *);
7 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm, PetscMPIInt, PetscMPIInt, const PetscInt[], PetscMPIInt **, PetscInt **);
8
9 typedef enum {
10 IS_INVALID,
11 IS_GENERAL,
12 IS_BLOCK,
13 IS_STRIDE
14 } ISTypeID;
15
ISGetTypeID_Private(IS is,ISTypeID * id)16 static inline PetscErrorCode ISGetTypeID_Private(IS is, ISTypeID *id)
17 {
18 PetscBool same;
19
20 PetscFunctionBegin;
21 *id = IS_INVALID;
22 PetscCall(PetscObjectTypeCompare((PetscObject)is, ISGENERAL, &same));
23 if (same) {
24 *id = IS_GENERAL;
25 goto functionend;
26 }
27 PetscCall(PetscObjectTypeCompare((PetscObject)is, ISBLOCK, &same));
28 if (same) {
29 *id = IS_BLOCK;
30 goto functionend;
31 }
32 PetscCall(PetscObjectTypeCompare((PetscObject)is, ISSTRIDE, &same));
33 if (same) {
34 *id = IS_STRIDE;
35 goto functionend;
36 }
37 functionend:
38 PetscFunctionReturn(PETSC_SUCCESS);
39 }
40
VecScatterBegin_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)41 static PetscErrorCode VecScatterBegin_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
42 {
43 PetscSF wsf = NULL; /* either sf or its local part */
44 MPI_Op mop = MPI_OP_NULL;
45 PetscMPIInt size;
46 PetscMemType xmtype = PETSC_MEMTYPE_HOST, ymtype = PETSC_MEMTYPE_HOST;
47
48 PetscFunctionBegin;
49 if (x != y) PetscCall(VecLockReadPush(x));
50 PetscCall(VecGetArrayReadAndMemType(x, &sf->vscat.xdata, &xmtype));
51 PetscCall(VecGetArrayAndMemType(y, &sf->vscat.ydata, &ymtype));
52 PetscCall(VecLockWriteSet(y, PETSC_TRUE));
53
54 /* SCATTER_FORWARD_LOCAL indicates ignoring inter-process communication */
55 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
56 if ((mode & SCATTER_FORWARD_LOCAL) && size > 1) { /* Lazy creation of sf->vscat.lsf since SCATTER_FORWARD_LOCAL is uncommon */
57 if (!sf->vscat.lsf) PetscCall(PetscSFCreateLocalSF_Private(sf, &sf->vscat.lsf));
58 wsf = sf->vscat.lsf;
59 } else {
60 wsf = sf;
61 }
62
63 /* Note xdata/ydata is always recorded on sf (not lsf) above */
64 if (addv == INSERT_VALUES) mop = MPI_REPLACE;
65 else if (addv == ADD_VALUES) mop = MPIU_SUM; /* PETSc defines its own MPI datatype and SUM operation for __float128 etc. */
66 else if (addv == MAX_VALUES) mop = MPIU_MAX;
67 else if (addv == MIN_VALUES) mop = MPIU_MIN;
68 else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);
69
70 if (mode & SCATTER_REVERSE) { /* REVERSE indicates leaves to root scatter. Note that x and y are swapped in input */
71 PetscCall(PetscSFReduceWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop));
72 } else { /* FORWARD indicates x to y scatter, where x is root and y is leaf */
73 PetscCall(PetscSFBcastWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop));
74 }
75 PetscFunctionReturn(PETSC_SUCCESS);
76 }
77
VecScatterEnd_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)78 static PetscErrorCode VecScatterEnd_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
79 {
80 PetscSF wsf = NULL;
81 MPI_Op mop = MPI_OP_NULL;
82 PetscMPIInt size;
83
84 PetscFunctionBegin;
85 /* SCATTER_FORWARD_LOCAL indicates ignoring inter-process communication */
86 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
87 wsf = ((mode & SCATTER_FORWARD_LOCAL) && size > 1) ? sf->vscat.lsf : sf;
88
89 if (addv == INSERT_VALUES) mop = MPI_REPLACE;
90 else if (addv == ADD_VALUES) mop = MPIU_SUM;
91 else if (addv == MAX_VALUES) mop = MPIU_MAX;
92 else if (addv == MIN_VALUES) mop = MPIU_MIN;
93 else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);
94
95 if (mode & SCATTER_REVERSE) { /* reverse scatter sends leaves to roots. Note that x and y are swapped in input */
96 PetscCall(PetscSFReduceEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop));
97 } else { /* forward scatter sends roots to leaves, i.e., x to y */
98 PetscCall(PetscSFBcastEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop));
99 }
100
101 PetscCall(VecRestoreArrayReadAndMemType(x, &sf->vscat.xdata));
102 if (x != y) PetscCall(VecLockReadPop(x));
103 PetscCall(VecRestoreArrayAndMemType(y, &sf->vscat.ydata));
104 PetscCall(VecLockWriteSet(y, PETSC_FALSE));
105 PetscFunctionReturn(PETSC_SUCCESS);
106 }
107
108 /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input sf scatters
109 x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j]. Note that in SF,
110 x is roots. That means we need to change incoming stuffs such as bas->irootloc[].
111 */
VecScatterRemap_Internal(VecScatter sf,const PetscInt * tomap,const PetscInt * frommap)112 static PetscErrorCode VecScatterRemap_Internal(VecScatter sf, const PetscInt *tomap, const PetscInt *frommap)
113 {
114 PetscInt i, bs = sf->vscat.bs;
115 PetscMPIInt size;
116 PetscBool ident = PETSC_TRUE, isbasic, isneighbor;
117 PetscSFType type;
118 PetscSF_Basic *bas = NULL;
119
120 PetscFunctionBegin;
121 /* check if it is an identity map. If it is, do nothing */
122 if (tomap) {
123 for (i = 0; i < sf->nroots * bs; i++) {
124 if (i != tomap[i]) {
125 ident = PETSC_FALSE;
126 break;
127 }
128 }
129 if (ident) PetscFunctionReturn(PETSC_SUCCESS);
130 }
131 PetscCheck(!frommap, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unable to remap the FROM in scatters yet");
132 if (!tomap) PetscFunctionReturn(PETSC_SUCCESS);
133
134 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
135
136 /* Since the indices changed, we must also update the local SF. But we do not do it since
137 lsf is rarely used. We just destroy lsf and rebuild it on demand from updated sf.
138 */
139 if (sf->vscat.lsf) PetscCall(PetscSFDestroy(&sf->vscat.lsf));
140
141 PetscCall(PetscSFGetType(sf, &type));
142 PetscCall(PetscObjectTypeCompare((PetscObject)sf, PETSCSFBASIC, &isbasic));
143 PetscCall(PetscObjectTypeCompare((PetscObject)sf, PETSCSFNEIGHBOR, &isneighbor));
144 PetscCheck(isbasic || isneighbor, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "VecScatterRemap on SF type %s is not supported", type);
145
146 PetscCall(PetscSFSetUp(sf)); /* to build sf->irootloc if SetUp is not yet called */
147
148 /* Root indices are going to be remapped. This is tricky for SF. Root indices are used in sf->rremote,
149 sf->remote and bas->irootloc. The latter one is cheap to remap, but the former two are not.
150 To remap them, we have to do a bcast from roots to leaves, to let leaves know their updated roots.
151 Since VecScatterRemap is supposed to be a cheap routine to adapt a vecscatter by only changing where
152 x[] data is taken, we do not remap sf->rremote, sf->remote. The consequence is that operations
153 accessing them (such as PetscSFCompose) may get stale info. Considering VecScatter does not need
154 that complicated SF operations, we do not remap sf->rremote, sf->remote, instead we destroy them
155 so that code accessing them (if any) will crash (instead of get silent errors). Note that BcastAndOp/Reduce,
156 which are used by VecScatter and only rely on bas->irootloc, are updated and correct.
157 */
158 sf->remote = NULL;
159 PetscCall(PetscFree(sf->remote_alloc));
160 /* Not easy to free sf->rremote since it was allocated with PetscMalloc4(), so just give it crazy values */
161 for (i = 0; i < sf->roffset[sf->nranks]; i++) sf->rremote[i] = PETSC_INT_MIN;
162
163 /* Indices in tomap[] are for each individual vector entry. But indices in sf are for each
164 block in the vector. So before the remapping, we have to expand indices in sf by bs, and
165 after the remapping, we have to shrink them back.
166 */
167 bas = (PetscSF_Basic *)sf->data;
168 for (i = 0; i < bas->ioffset[bas->niranks]; i++) bas->irootloc[i] = tomap[bas->irootloc[i] * bs] / bs;
169 #if defined(PETSC_HAVE_DEVICE)
170 /* Free the irootloc copy on device. We allocate a new copy and get the updated value on demand. See PetscSFLinkGetRootPackOptAndIndices() */
171 for (i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
172 #endif
173 /* Destroy and then rebuild root packing optimizations since indices are changed */
174 PetscCall(PetscSFResetPackFields(sf));
175 PetscCall(PetscSFSetUpPackFields(sf));
176 PetscFunctionReturn(PETSC_SUCCESS);
177 }
178
179 /*
180 Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
181
182 Input Parameters:
183 + sf - the context (must be a parallel vecscatter)
184 - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
185
186 Output parameters:
187 + num_procs - number of remote processors
188 - num_entries - number of vector entries to send or recv
189
190 Notes:
191 Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
192 usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
193
194 .seealso: [](sec_scatter), `VecScatterGetRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
195 */
VecScatterGetRemoteCount_Private(VecScatter sf,PetscBool send,PetscInt * num_procs,PetscInt * num_entries)196 PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter sf, PetscBool send, PetscInt *num_procs, PetscInt *num_entries)
197 {
198 PetscMPIInt nranks, remote_start;
199 PetscMPIInt rank;
200 const PetscInt *offset;
201 const PetscMPIInt *ranks;
202
203 PetscFunctionBegin;
204 PetscCall(PetscSFSetUp(sf));
205 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
206
207 /* This routine is mainly used for MatMult's Mvctx. In Mvctx, we scatter an MPI vector x to a sequential vector lvec.
208 Remember x is roots and lvec is leaves. 'send' means roots to leaves communication. If 'send' is true, we need to
209 get info about which ranks this processor needs to send to. In other words, we need to call PetscSFGetLeafRanks().
210 If send is false, we do the opposite, calling PetscSFGetRootRanks().
211 */
212 if (send) PetscCall(PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, NULL));
213 else PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, NULL, NULL));
214 if (nranks) {
215 remote_start = (rank == ranks[0]) ? 1 : 0;
216 if (num_procs) *num_procs = nranks - remote_start;
217 if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
218 } else {
219 if (num_procs) *num_procs = 0;
220 if (num_entries) *num_entries = 0;
221 }
222 PetscFunctionReturn(PETSC_SUCCESS);
223 }
224
225 /* Given a parallel VecScatter context, return a plan that represents the remote communication.
226 Any output parameter can be NULL.
227
228 Input Parameters:
229 + sf - the context
230 - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
231
232 Output parameters:
233 + n - number of remote processors
234 . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
235 Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
236 expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
237 with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
238 . indices - indices of entries to send/recv
239 . procs - ranks of remote processors
240 - bs - block size
241
242 .seealso: `VecScatterRestoreRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
243 */
VecScatterGetRemote_Private(VecScatter sf,PetscBool send,PetscMPIInt * n,const PetscInt ** starts,const PetscInt ** indices,const PetscMPIInt ** procs,PetscInt * bs)244 PetscErrorCode VecScatterGetRemote_Private(VecScatter sf, PetscBool send, PetscMPIInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
245 {
246 PetscMPIInt nranks, remote_start;
247 PetscMPIInt rank;
248 const PetscInt *offset, *location;
249 const PetscMPIInt *ranks;
250
251 PetscFunctionBegin;
252 PetscCall(PetscSFSetUp(sf));
253 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
254
255 if (send) PetscCall(PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, &location));
256 else PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, &location, NULL));
257
258 if (nranks) {
259 remote_start = (rank == ranks[0]) ? 1 : 0;
260 if (n) *n = nranks - remote_start;
261 if (starts) *starts = &offset[remote_start];
262 if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
263 if (procs) *procs = &ranks[remote_start];
264 } else {
265 if (n) *n = 0;
266 if (starts) *starts = NULL;
267 if (indices) *indices = NULL;
268 if (procs) *procs = NULL;
269 }
270
271 if (bs) *bs = 1;
272 PetscFunctionReturn(PETSC_SUCCESS);
273 }
274
275 /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
276 processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
277
278 Input Parameters:
279 + sf - the context
280 - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
281
282 Output parameters:
283 + n - number of remote processors
284 . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
285 Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
286 expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
287 with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
288 . indices - indices of entries to send/recv
289 . procs - ranks of remote processors
290 - bs - block size
291
292 Notes:
293 Output parameters like starts, indices must also be adapted according to the sorted ranks.
294
295 .seealso: `VecScatterRestoreRemoteOrdered_Private()`, `VecScatterGetRemote_Private()`
296 */
VecScatterGetRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscMPIInt * n,const PetscInt ** starts,const PetscInt ** indices,const PetscMPIInt ** procs,PetscInt * bs)297 PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscMPIInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
298 {
299 PetscFunctionBegin;
300 PetscCall(VecScatterGetRemote_Private(sf, send, n, starts, indices, procs, bs));
301 if (PetscUnlikelyDebug(n && procs)) {
302 PetscMPIInt i;
303 /* from back to front to also handle cases *n=0 */
304 for (i = *n - 1; i > 0; i--) PetscCheck((*procs)[i - 1] <= (*procs)[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "procs[] are not ordered");
305 }
306 PetscFunctionReturn(PETSC_SUCCESS);
307 }
308
309 /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
310 an implementation to free memory allocated in the VecScatterGetRemote_Private call.
311
312 Input Parameters:
313 + sf - the context
314 - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
315
316 Output parameters:
317 + n - number of remote processors
318 . starts - starting point in indices for each proc
319 . indices - indices of entries to send/recv
320 . procs - ranks of remote processors
321 - bs - block size
322
323 .seealso: `VecScatterGetRemote_Private()`
324 */
VecScatterRestoreRemote_Private(VecScatter sf,PetscBool send,PetscMPIInt * n,const PetscInt ** starts,const PetscInt ** indices,const PetscMPIInt ** procs,PetscInt * bs)325 PetscErrorCode VecScatterRestoreRemote_Private(VecScatter sf, PetscBool send, PetscMPIInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
326 {
327 PetscFunctionBegin;
328 if (starts) *starts = NULL;
329 if (indices) *indices = NULL;
330 if (procs) *procs = NULL;
331 PetscFunctionReturn(PETSC_SUCCESS);
332 }
333
334 /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
335 an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
336
337 Input Parameters:
338 + sf - the context
339 - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
340
341 Output parameters:
342 + n - number of remote processors
343 . starts - starting point in indices for each proc
344 . indices - indices of entries to send/recv
345 . procs - ranks of remote processors
346 - bs - block size
347
348 .seealso: `VecScatterGetRemoteOrdered_Private()`
349 */
VecScatterRestoreRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscMPIInt * n,const PetscInt ** starts,const PetscInt ** indices,const PetscMPIInt ** procs,PetscInt * bs)350 PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscMPIInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
351 {
352 PetscFunctionBegin;
353 PetscCall(VecScatterRestoreRemote_Private(sf, send, n, starts, indices, procs, bs));
354 PetscFunctionReturn(PETSC_SUCCESS);
355 }
356
357 /*@
358 VecScatterSetUp - Sets up the `VecScatter` to be able to actually scatter information between vectors
359
360 Collective
361
362 Input Parameter:
363 . sf - the scatter context
364
365 Level: intermediate
366
367 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCopy()`
368 @*/
VecScatterSetUp(VecScatter sf)369 PetscErrorCode VecScatterSetUp(VecScatter sf)
370 {
371 PetscFunctionBegin;
372 PetscCall(PetscSFSetUp(sf));
373 PetscFunctionReturn(PETSC_SUCCESS);
374 }
375
376 /*@
377 VecScatterSetType - Builds a vector scatter, for a particular vector scatter implementation.
378
379 Collective
380
381 Input Parameters:
382 + sf - The `VecScatter` object
383 - type - The name of the vector scatter type
384
385 Options Database Key:
386 . -sf_type <type> - Sets the `VecScatterType`
387
388 Level: intermediate
389
390 Note:
391 Use `VecScatterDuplicate()` to form additional vectors scatter of the same type as an existing vector scatter.
392
393 .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterGetType()`, `VecScatterCreate()`
394 @*/
VecScatterSetType(VecScatter sf,VecScatterType type)395 PetscErrorCode VecScatterSetType(VecScatter sf, VecScatterType type)
396 {
397 PetscFunctionBegin;
398 PetscCall(PetscSFSetType(sf, type));
399 PetscFunctionReturn(PETSC_SUCCESS);
400 }
401
402 /*@
403 VecScatterGetType - Gets the vector scatter type name (as a string) from the `VecScatter`.
404
405 Not Collective
406
407 Input Parameter:
408 . sf - The vector scatter
409
410 Output Parameter:
411 . type - The vector scatter type name
412
413 Level: intermediate
414
415 .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterSetType()`, `VecScatterCreate()`
416 @*/
VecScatterGetType(VecScatter sf,VecScatterType * type)417 PetscErrorCode VecScatterGetType(VecScatter sf, VecScatterType *type)
418 {
419 PetscFunctionBegin;
420 PetscCall(PetscSFGetType(sf, type));
421 PetscFunctionReturn(PETSC_SUCCESS);
422 }
423
424 /*@C
425 VecScatterRegister - Adds a new vector scatter component implementation
426
427 Not Collective
428
429 Input Parameters:
430 + sname - The name of a new user-defined creation routine
431 - function - The creation routine
432
433 Level: advanced
434
435 .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecRegister()`
436 @*/
VecScatterRegister(const char sname[],PetscErrorCode (* function)(VecScatter))437 PetscErrorCode VecScatterRegister(const char sname[], PetscErrorCode (*function)(VecScatter))
438 {
439 PetscFunctionBegin;
440 PetscCall(PetscSFRegister(sname, function));
441 PetscFunctionReturn(PETSC_SUCCESS);
442 }
443
444 /*@
445 VecScatterGetMerged - Returns true if the scatter is completed in the `VecScatterBegin()`
446 and the `VecScatterEnd()` does nothing
447
448 Not Collective
449
450 Input Parameter:
451 . sf - scatter context created with `VecScatterCreate()`
452
453 Output Parameter:
454 . flg - `PETSC_TRUE` if the `VecScatterBegin()`/`VecScatterEnd()` are all done during the `VecScatterBegin()`
455
456 Level: developer
457
458 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterEnd()`, `VecScatterBegin()`
459 @*/
VecScatterGetMerged(VecScatter sf,PetscBool * flg)460 PetscErrorCode VecScatterGetMerged(VecScatter sf, PetscBool *flg)
461 {
462 PetscFunctionBegin;
463 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
464 if (flg) *flg = sf->vscat.beginandendtogether;
465 PetscFunctionReturn(PETSC_SUCCESS);
466 }
467 /*@
468 VecScatterDestroy - Destroys a scatter context created by `VecScatterCreate()`
469
470 Collective
471
472 Input Parameter:
473 . sf - the scatter context
474
475 Level: intermediate
476
477 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCopy()`
478 @*/
VecScatterDestroy(VecScatter * sf)479 PetscErrorCode VecScatterDestroy(VecScatter *sf)
480 {
481 PetscFunctionBegin;
482 PetscCall(PetscSFDestroy(sf));
483 PetscFunctionReturn(PETSC_SUCCESS);
484 }
485
486 /*@
487 VecScatterCopy - Makes a copy of a scatter context.
488
489 Collective
490
491 Input Parameter:
492 . sf - the scatter context
493
494 Output Parameter:
495 . newsf - the context copy
496
497 Level: advanced
498
499 .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterCreate()`, `VecScatterDestroy()`
500 @*/
VecScatterCopy(VecScatter sf,VecScatter * newsf)501 PetscErrorCode VecScatterCopy(VecScatter sf, VecScatter *newsf)
502 {
503 PetscFunctionBegin;
504 PetscAssertPointer(newsf, 2);
505 PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_GRAPH, newsf));
506 PetscCall(PetscSFSetUp(*newsf));
507 PetscFunctionReturn(PETSC_SUCCESS);
508 }
509
510 /*@
511 VecScatterViewFromOptions - View a `VecScatter` object based on values in the options database
512
513 Collective
514
515 Input Parameters:
516 + sf - the scatter context
517 . obj - Optional object
518 - name - command line option
519
520 Level: intermediate
521
522 Note:
523 See `PetscObjectViewFromOptions()` for available `PetscViewer` and `PetscViewerFormat` values
524
525 .seealso: [](sec_scatter), `VecScatter`, `VecScatterView()`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
526 @*/
VecScatterViewFromOptions(VecScatter sf,PetscObject obj,const char name[])527 PetscErrorCode VecScatterViewFromOptions(VecScatter sf, PetscObject obj, const char name[])
528 {
529 PetscFunctionBegin;
530 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
531 PetscCall(PetscObjectViewFromOptions((PetscObject)sf, obj, name));
532 PetscFunctionReturn(PETSC_SUCCESS);
533 }
534
535 /*@
536 VecScatterView - Views a vector scatter context.
537
538 Collective
539
540 Input Parameters:
541 + sf - the scatter context
542 - viewer - the viewer for displaying the context
543
544 Level: intermediate
545
546 .seealso: [](sec_scatter), `VecScatter`, `PetscViewer`, `VecScatterViewFromOptions()`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
547 @*/
VecScatterView(VecScatter sf,PetscViewer viewer)548 PetscErrorCode VecScatterView(VecScatter sf, PetscViewer viewer)
549 {
550 PetscFunctionBegin;
551 PetscCall(PetscSFView(sf, viewer));
552 PetscFunctionReturn(PETSC_SUCCESS);
553 }
554
555 /*@
556 VecScatterRemap - Remaps the "from" and "to" indices in a
557 vector scatter context.
558
559 Collective
560
561 Input Parameters:
562 + sf - vector scatter context
563 . tomap - remapping plan for "to" indices (may be `NULL`).
564 - frommap - remapping plan for "from" indices (may be `NULL`)
565
566 Level: developer
567
568 Notes:
569 In the parallel case the todata contains indices from where the data is taken
570 (and then sent to others)! The fromdata contains indices from where the received
571 data is finally put locally.
572
573 In the sequential case the todata contains indices from where the data is put
574 and the fromdata contains indices from where the data is taken from.
575 This is backwards from the parallel case!
576
577 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`
578 @*/
VecScatterRemap(VecScatter sf,PetscInt tomap[],PetscInt frommap[])579 PetscErrorCode VecScatterRemap(VecScatter sf, PetscInt tomap[], PetscInt frommap[])
580 {
581 PetscFunctionBegin;
582 if (tomap) PetscAssertPointer(tomap, 2);
583 if (frommap) PetscAssertPointer(frommap, 3);
584 PetscCall(VecScatterRemap_Internal(sf, tomap, frommap));
585 PetscCheck(!frommap, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unable to remap the FROM in scatters yet");
586 /* Mark then vector lengths as unknown because we do not know the lengths of the remapped vectors */
587 sf->vscat.from_n = -1;
588 sf->vscat.to_n = -1;
589 PetscFunctionReturn(PETSC_SUCCESS);
590 }
591
592 /*@
593 VecScatterSetFromOptions - Configures the vector scatter from values in the options database.
594
595 Collective
596
597 Input Parameter:
598 . sf - The vector scatter
599
600 Notes:
601 To see all options, run your program with the -help option, or consult the users manual.
602
603 Must be called before `VecScatterSetUp()` and before the vector scatter is used.
604
605 Level: beginner
606
607 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterDestroy()`, `VecScatterSetUp()`
608 @*/
VecScatterSetFromOptions(VecScatter sf)609 PetscErrorCode VecScatterSetFromOptions(VecScatter sf)
610 {
611 PetscFunctionBegin;
612 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
613 PetscObjectOptionsBegin((PetscObject)sf);
614
615 sf->vscat.beginandendtogether = PETSC_FALSE;
616 PetscCall(PetscOptionsBool("-vecscatter_merge", "Use combined (merged) vector scatter begin and end", "VecScatterCreate", sf->vscat.beginandendtogether, &sf->vscat.beginandendtogether, NULL));
617 if (sf->vscat.beginandendtogether) PetscCall(PetscInfo(sf, "Using combined (merged) vector scatter begin and end\n"));
618 PetscOptionsEnd();
619 PetscFunctionReturn(PETSC_SUCCESS);
620 }
621
622 /*@
623 VecScatterCreate - Creates a vector scatter `VecScatter` context that is used to communicate entries between two vectors `Vec`
624
625 Collective
626
627 Input Parameters:
628 + x - a vector that defines the shape (parallel data layout of the vector) of vectors from which we scatter
629 . y - a vector that defines the shape (parallel data layout of the vector) of vectors to which we scatter
630 . ix - the indices of `x` to scatter (if `NULL` scatters all values)
631 - iy - the indices of `y` to hold results (if `NULL` fills entire vector `yin` in order)
632
633 Output Parameter:
634 . newsf - location to store the new scatter context
635
636 Options Database Keys:
637 + -vecscatter_view - Prints detail of communications
638 . -vecscatter_view ::ascii_info - Print less details about communication
639 - -vecscatter_merge - `VecScatterBegin()` handles all of the communication, `VecScatterEnd()` is a nop
640 eliminates the chance for overlap of computation and communication
641
642 Level: intermediate
643
644 Notes:
645 If both `x` and `y` are parallel, their communicator must be on the same
646 set of processes, but their process order can be different.
647 In calls to the scatter options you can use different vectors than the `x` and
648 `y` you used above; BUT they must have the same parallel data layout, for example,
649 they could be obtained from `VecDuplicate()`.
650 A `VecScatter` context CANNOT be used in two or more simultaneous scatters;
651 that is you cannot call a second `VecScatterBegin()` with the same scatter
652 context until the `VecScatterEnd()` has been called on the first `VecScatterBegin()`.
653 In this case a separate `VecScatter` is needed for each concurrent scatter.
654
655 Both `ix` and `iy` cannot be `NULL` at the same time.
656
657 Use `VecScatterCreateToAll()` to create a `VecScatter` that copies an MPI vector to sequential vectors on all MPI processes.
658 Use `VecScatterCreateToZero()` to create a `VecScatter` that copies an MPI vector to a sequential vector on MPI rank 0.
659 These special `VecScatter` have better performance than general ones.
660
661 Developer Note:
662 The implementations of most the `VecScatter` are done using `PetscSF`.
663
664 .seealso: [](sec_scatter), `VecScatter`, `VecScatterDestroy()`, `VecScatterCreateToAll()`, `VecScatterCreateToZero()`, `PetscSFCreate()`,
665 `VecScatterType`, `InsertMode`, `ScatterMode`, `VecScatterBegin()`, `VecScatterEnd()`
666 @*/
VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter * newsf)667 PetscErrorCode VecScatterCreate(Vec x, IS ix, Vec y, IS iy, VecScatter *newsf)
668 {
669 MPI_Comm xcomm, ycomm, bigcomm;
670 Vec xx, yy;
671 IS ix_old = ix, iy_old = iy, ixx, iyy;
672 PetscMPIInt xcommsize, ycommsize, rank, result;
673 PetscInt i, n, N, nroots, nleaves, *ilocal, xstart, ystart, ixsize, iysize, xlen, ylen;
674 const PetscInt *xindices, *yindices;
675 PetscSFNode *iremote;
676 PetscLayout xlayout, ylayout;
677 ISTypeID ixid, iyid;
678 PetscInt bs, bsx, bsy, min, max, m[2], mg[2], ixfirst, ixstep, iyfirst, iystep;
679 PetscBool can_do_block_opt = PETSC_FALSE;
680 PetscSF sf;
681
682 PetscFunctionBegin;
683 PetscAssertPointer(newsf, 5);
684 PetscCheck(ix || iy, PetscObjectComm((PetscObject)x), PETSC_ERR_SUP, "Cannot pass default in for both input and output indices");
685
686 /* Get comm from x and y */
687 PetscCall(PetscObjectGetComm((PetscObject)x, &xcomm));
688 PetscCallMPI(MPI_Comm_size(xcomm, &xcommsize));
689 PetscCall(PetscObjectGetComm((PetscObject)y, &ycomm));
690 PetscCallMPI(MPI_Comm_size(ycomm, &ycommsize));
691 if (xcommsize > 1 && ycommsize > 1) {
692 PetscCallMPI(MPI_Comm_compare(xcomm, ycomm, &result));
693 PetscCheck(result != MPI_UNEQUAL, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "VecScatterCreate: parallel vectors x and y must have identical/congruent/similar communicators");
694 }
695 bs = 1; /* default, no blocking */
696
697 /*
698 Let P and S stand for parallel and sequential vectors respectively. There are four combinations of vecscatters: PtoP, PtoS,
699 StoP and StoS. The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix
700 contains global indices of x. If x is sequential, ix contains local indices of x. Similarly for y and iy.
701
702 SF builds around concepts of local leaves and remote roots. We treat source vector x as roots and destination vector y as
703 leaves. A PtoS scatter can be naturally mapped to SF. We transform PtoP and StoP to PtoS, and treat StoS as trivial PtoS.
704 */
705
706 /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
707 if (!ix) {
708 if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
709 PetscCall(VecGetSize(x, &N));
710 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &ix));
711 } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
712 PetscCall(VecGetLocalSize(x, &n));
713 PetscCall(VecGetOwnershipRange(x, &xstart, NULL));
714 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, xstart, 1, &ix));
715 }
716 }
717
718 if (!iy) {
719 if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
720 PetscCall(VecGetSize(y, &N));
721 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iy));
722 } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
723 PetscCall(VecGetLocalSize(y, &n));
724 PetscCall(VecGetOwnershipRange(y, &ystart, NULL));
725 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, ystart, 1, &iy));
726 }
727 }
728
729 /* Do error checking immediately after we have non-empty ix, iy */
730 PetscCall(ISGetLocalSize(ix, &ixsize));
731 PetscCall(ISGetLocalSize(iy, &iysize));
732 PetscCall(VecGetSize(x, &xlen));
733 PetscCall(VecGetSize(y, &ylen));
734 PetscCheck(ixsize == iysize, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Scatter sizes of ix and iy don't match locally ix=%" PetscInt_FMT " iy=%" PetscInt_FMT, ixsize, iysize);
735 PetscCall(ISGetMinMax(ix, &min, &max));
736 PetscCheck(min >= 0 && max < xlen, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Scatter indices in ix are out of range: found [%" PetscInt_FMT ",%" PetscInt_FMT "), expected in [0,%" PetscInt_FMT ")", min, max, xlen);
737 PetscCall(ISGetMinMax(iy, &min, &max));
738 PetscCheck(min >= 0 && max < ylen, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Scatter indices in iy are out of range: found [%" PetscInt_FMT ",%" PetscInt_FMT "), expected in [0,%" PetscInt_FMT ")", min, max, ylen);
739
740 /* Extract info about ix, iy for further test */
741 PetscCall(ISGetTypeID_Private(ix, &ixid));
742 PetscCall(ISGetTypeID_Private(iy, &iyid));
743 if (ixid == IS_BLOCK) PetscCall(ISGetBlockSize(ix, &bsx));
744 else if (ixid == IS_STRIDE) PetscCall(ISStrideGetInfo(ix, &ixfirst, &ixstep));
745
746 if (iyid == IS_BLOCK) PetscCall(ISGetBlockSize(iy, &bsy));
747 else if (iyid == IS_STRIDE) PetscCall(ISStrideGetInfo(iy, &iyfirst, &iystep));
748
749 /* Check if a PtoS is special ToAll/ToZero scatters, which can be results of VecScatterCreateToAll/Zero.
750 ToAll means a whole MPI vector is copied to a seq vector on every process. ToZero means a whole MPI
751 vector is copied to a seq vector on rank 0 and other processes do nothing(i.e.,they input empty ix,iy).
752
753 We can optimize these scatters with MPI collectives. We can also avoid costly analysis used for general scatters.
754 */
755 if (xcommsize > 1 && ycommsize == 1) { /* Ranks do not diverge at this if-test */
756 PetscInt pattern[2] = {0, 0}; /* A boolean array with pattern[0] for allgather-like (ToAll) and pattern[1] for gather-like (ToZero) */
757 PetscLayout map;
758
759 PetscCallMPI(MPI_Comm_rank(xcomm, &rank));
760 PetscCall(VecGetLayout(x, &map));
761 if (rank == 0) {
762 if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
763 /* Rank 0 scatters the whole mpi x to seq y, so it is either a ToAll or a ToZero candidate in its view */
764 pattern[0] = pattern[1] = 1;
765 }
766 } else {
767 if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
768 /* Other ranks also scatter the whole mpi x to seq y, so it is a ToAll candidate in their view */
769 pattern[0] = 1;
770 } else if (ixsize == 0) {
771 /* Other ranks do nothing, so it is a ToZero candidate */
772 pattern[1] = 1;
773 }
774 }
775
776 /* One stone (the expensive allreduce) two birds: pattern[] tells if it is ToAll or ToZero */
777 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, pattern, 2, MPIU_INT, MPI_LAND, xcomm));
778
779 if (pattern[0] || pattern[1]) {
780 PetscCall(PetscSFCreate(xcomm, &sf));
781 PetscCall(PetscSFSetFromOptions(sf));
782 PetscCall(PetscSFSetGraphWithPattern(sf, map, pattern[0] ? PETSCSF_PATTERN_ALLGATHER : PETSCSF_PATTERN_GATHER));
783 goto functionend; /* No further analysis needed. What a big win! */
784 }
785 }
786
787 /* Continue ...
788 Do block optimization by taking advantage of high level info available in ix, iy.
789 The block optimization is valid when all of the following conditions are met:
790 1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
791 2) ix, iy have the same block size;
792 3) all processors agree on one block size;
793 4) no blocks span more than one process;
794 */
795 bigcomm = (xcommsize == 1) ? ycomm : xcomm;
796
797 /* Processors could go through different path in this if-else test */
798 m[0] = PETSC_INT_MAX;
799 m[1] = PETSC_INT_MIN;
800 if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
801 m[0] = PetscMin(bsx, bsy);
802 m[1] = PetscMax(bsx, bsy);
803 } else if (ixid == IS_BLOCK && iyid == IS_STRIDE && iystep == 1 && iyfirst % bsx == 0) {
804 m[0] = bsx;
805 m[1] = bsx;
806 } else if (ixid == IS_STRIDE && iyid == IS_BLOCK && ixstep == 1 && ixfirst % bsy == 0) {
807 m[0] = bsy;
808 m[1] = bsy;
809 }
810 /* Get max and min of bsx,bsy over all processes in one allreduce */
811 PetscCall(PetscGlobalMinMaxInt(bigcomm, m, mg));
812
813 /* Since we used allreduce above, all ranks will have the same min and max. min==max
814 implies all ranks have the same bs. Do further test to see if local vectors are dividable
815 by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
816 */
817 if (mg[0] == mg[1] && mg[0] > 1) {
818 PetscCall(VecGetLocalSize(x, &xlen));
819 PetscCall(VecGetLocalSize(y, &ylen));
820 m[0] = xlen % mg[0];
821 m[1] = ylen % mg[0];
822 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, m, 2, MPIU_INT, MPI_LOR, bigcomm));
823 if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
824 }
825
826 /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
827 and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
828 indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.
829
830 xx is a little special. If x is seq, then xx is the concatenation of seq x's on ycomm. In this way,
831 we can treat PtoP and StoP uniformly as PtoS.
832 */
833 if (can_do_block_opt) {
834 const PetscInt *indices;
835
836 /* Shrink x and ix */
837 bs = mg[0];
838 PetscCall(VecCreateMPIWithArray(bigcomm, 1, xlen / bs, PETSC_DECIDE, NULL, &xx)); /* We only care xx's layout */
839 if (ixid == IS_BLOCK) {
840 PetscCall(ISBlockGetIndices(ix, &indices));
841 PetscCall(ISBlockGetLocalSize(ix, &ixsize));
842 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ixsize, indices, PETSC_COPY_VALUES, &ixx));
843 PetscCall(ISBlockRestoreIndices(ix, &indices));
844 } else { /* ixid == IS_STRIDE */
845 PetscCall(ISGetLocalSize(ix, &ixsize));
846 PetscCall(ISCreateStride(PETSC_COMM_SELF, ixsize / bs, ixfirst / bs, 1, &ixx));
847 }
848
849 /* Shrink y and iy */
850 PetscCall(VecCreateMPIWithArray(ycomm, 1, ylen / bs, PETSC_DECIDE, NULL, &yy));
851 if (iyid == IS_BLOCK) {
852 PetscCall(ISBlockGetIndices(iy, &indices));
853 PetscCall(ISBlockGetLocalSize(iy, &iysize));
854 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, iysize, indices, PETSC_COPY_VALUES, &iyy));
855 PetscCall(ISBlockRestoreIndices(iy, &indices));
856 } else { /* iyid == IS_STRIDE */
857 PetscCall(ISGetLocalSize(iy, &iysize));
858 PetscCall(ISCreateStride(PETSC_COMM_SELF, iysize / bs, iyfirst / bs, 1, &iyy));
859 }
860 } else {
861 ixx = ix;
862 iyy = iy;
863 yy = y;
864 if (xcommsize == 1) PetscCall(VecCreateMPIWithArray(bigcomm, 1, xlen, PETSC_DECIDE, NULL, &xx));
865 else xx = x;
866 }
867
868 /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
869 PetscCall(ISGetIndices(ixx, &xindices));
870 PetscCall(ISGetIndices(iyy, &yindices));
871 PetscCall(VecGetLayout(xx, &xlayout));
872
873 if (ycommsize > 1) {
874 /* PtoP or StoP */
875
876 /* Below is a piece of complex code with a very simple goal: move global index pairs (xindices[i], yindices[i]),
877 to owner process of yindices[i] according to ylayout, i = 0..n.
878
879 I did it through a temp sf, but later I thought the old design was inefficient and also distorted log view.
880 We want to map one VecScatterCreate() call to one PetscSFCreate() call. The old design mapped to three
881 PetscSFCreate() calls. This code is on critical path of VecScatterSetUp and is used by every VecScatterCreate.
882 So I commented it out and did another optimized implementation. The commented code is left here for reference.
883 */
884 #if 0
885 const PetscInt *degree;
886 PetscSF tmpsf;
887 PetscInt inedges=0,*leafdata,*rootdata;
888
889 PetscCall(VecGetOwnershipRange(xx,&xstart,NULL));
890 PetscCall(VecGetLayout(yy,&ylayout));
891 PetscCall(VecGetOwnershipRange(yy,&ystart,NULL));
892
893 PetscCall(VecGetLocalSize(yy,&nroots));
894 PetscCall(ISGetLocalSize(iyy,&nleaves));
895 PetscCall(PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata));
896
897 for (i=0; i<nleaves; i++) {
898 PetscCall(PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index));
899 leafdata[2*i] = yindices[i];
900 leafdata[2*i+1] = (xcommsize > 1)? xindices[i] : xindices[i] + xstart;
901 }
902
903 PetscCall(PetscSFCreate(ycomm,&tmpsf));
904 PetscCall(PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER));
905
906 PetscCall(PetscSFComputeDegreeBegin(tmpsf,°ree));
907 PetscCall(PetscSFComputeDegreeEnd(tmpsf,°ree));
908
909 for (i=0; i<nroots; i++) inedges += degree[i];
910 PetscCall(PetscMalloc1(inedges*2,&rootdata));
911 PetscCall(PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata));
912 PetscCall(PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata));
913
914 PetscCall(PetscFree2(iremote,leafdata));
915 PetscCall(PetscSFDestroy(&tmpsf));
916
917 /* rootdata contains global index pairs (i, j). j's are owned by the current process, but i's can point to anywhere.
918 We convert j to local, and convert i to (rank, index). In the end, we get an PtoS suitable for building SF.
919 */
920 nleaves = inedges;
921 PetscCall(VecGetLocalSize(xx,&nroots));
922 PetscCall(PetscMalloc1(nleaves,&ilocal));
923 PetscCall(PetscMalloc1(nleaves,&iremote));
924
925 for (i=0; i<inedges; i++) {
926 ilocal[i] = rootdata[2*i] - ystart; /* convert y's global index to local index */
927 PetscCall(PetscLayoutFindOwnerIndex(xlayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index)); /* convert x's global index to (rank, index) */
928 }
929 PetscCall(PetscFree(rootdata));
930 #else
931 PetscInt j, k, n, disp, rlentotal, *sstart, *xindices_sorted, *yindices_sorted;
932 const PetscInt *yrange;
933 PetscMPIInt nsend, nrecv, nreq, yrank, *sendto, *recvfrom, tag1, tag2;
934 PetscInt *slens, *rlens, count;
935 PetscInt *rxindices, *ryindices;
936 MPI_Request *reqs, *sreqs, *rreqs;
937
938 /* Sorting makes code simpler, faster and also helps getting rid of many O(P) arrays, which hurt scalability at large scale
939 yindices_sorted - sorted yindices
940 xindices_sorted - xindices sorted along with yindces
941 */
942 PetscCall(ISGetLocalSize(ixx, &n)); /*ixx, iyy have the same local size */
943 PetscCall(PetscMalloc2(n, &xindices_sorted, n, &yindices_sorted));
944 PetscCall(PetscArraycpy(xindices_sorted, xindices, n));
945 PetscCall(PetscArraycpy(yindices_sorted, yindices, n));
946 PetscCall(PetscSortIntWithArray(n, yindices_sorted, xindices_sorted));
947 PetscCall(VecGetOwnershipRange(xx, &xstart, NULL));
948 if (xcommsize == 1) {
949 for (i = 0; i < n; i++) xindices_sorted[i] += xstart;
950 } /* Convert to global indices */
951
952 /*
953 Calculate info about messages I need to send
954 nsend - number of non-empty messages to send
955 sendto - [nsend] ranks I will send messages to
956 sstart - [nsend+1] sstart[i] is the start index in xsindices_sorted[] I send to rank sendto[i]
957 slens - [ycommsize] I want to send slens[i] entries to rank i.
958 */
959 PetscCall(VecGetLayout(yy, &ylayout));
960 PetscCall(PetscLayoutGetRanges(ylayout, &yrange));
961 PetscCall(PetscCalloc1(ycommsize, &slens)); /* The only O(P) array in this algorithm */
962
963 i = j = nsend = 0;
964 while (i < n) {
965 if (yindices_sorted[i] >= yrange[j + 1]) { /* If i-th index is out of rank j's bound */
966 do {
967 j++;
968 } while (yindices_sorted[i] >= yrange[j + 1] && j < ycommsize); /* Increase j until i-th index falls in rank j's bound */
969 PetscCheck(j != ycommsize, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " not owned by any process, upper bound %" PetscInt_FMT, yindices_sorted[i], yrange[ycommsize]);
970 }
971 i++;
972 if (!slens[j]++) nsend++;
973 }
974
975 PetscCall(PetscMalloc2(nsend + 1, &sstart, nsend, &sendto));
976
977 sstart[0] = 0;
978 for (i = j = 0; i < ycommsize; i++) {
979 if (slens[i]) {
980 PetscCall(PetscMPIIntCast(i, &sendto[j]));
981 sstart[j + 1] = sstart[j] + slens[i];
982 j++;
983 }
984 }
985
986 /*
987 Calculate the reverse info about messages I will recv
988 nrecv - number of messages I will recv
989 recvfrom - [nrecv] ranks I recv from
990 rlens - [nrecv] I will recv rlens[i] entries from rank recvfrom[i]
991 rlentotal - sum of rlens[]
992 rxindices - [rlentotal] recv buffer for xindices_sorted
993 ryindices - [rlentotal] recv buffer for yindices_sorted
994 */
995 PetscCall(PetscGatherNumberOfMessages_Private(ycomm, NULL, slens, &nrecv));
996 PetscCall(PetscGatherMessageLengths_Private(ycomm, nsend, nrecv, slens, &recvfrom, &rlens));
997 PetscCall(PetscFree(slens)); /* Free the O(P) array ASAP */
998 rlentotal = 0;
999 for (i = 0; i < nrecv; i++) rlentotal += rlens[i];
1000
1001 /*
1002 Communicate with processors in recvfrom[] to populate rxindices and ryindices
1003 */
1004 PetscCall(PetscCommGetNewTag(ycomm, &tag1));
1005 PetscCall(PetscCommGetNewTag(ycomm, &tag2));
1006 PetscCall(PetscMalloc2(rlentotal, &rxindices, rlentotal, &ryindices));
1007 PetscCall(PetscMPIIntCast((nsend + nrecv) * 2, &nreq));
1008 PetscCall(PetscMalloc1(nreq, &reqs));
1009 sreqs = reqs;
1010 rreqs = PetscSafePointerPlusOffset(reqs, nsend * 2);
1011
1012 for (i = disp = 0; i < nrecv; i++) {
1013 count = rlens[i];
1014 PetscCallMPI(MPIU_Irecv(rxindices + disp, count, MPIU_INT, recvfrom[i], tag1, ycomm, rreqs + i));
1015 PetscCallMPI(MPIU_Irecv(ryindices + disp, count, MPIU_INT, recvfrom[i], tag2, ycomm, rreqs + nrecv + i));
1016 disp += rlens[i];
1017 }
1018
1019 for (i = 0; i < nsend; i++) {
1020 count = sstart[i + 1] - sstart[i];
1021 PetscCallMPI(MPIU_Isend(xindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag1, ycomm, sreqs + i));
1022 PetscCallMPI(MPIU_Isend(yindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag2, ycomm, sreqs + nsend + i));
1023 }
1024 PetscCallMPI(MPI_Waitall(nreq, reqs, MPI_STATUSES_IGNORE));
1025
1026 /* Transform VecScatter into SF */
1027 nleaves = rlentotal;
1028 PetscCall(PetscMalloc1(nleaves, &ilocal));
1029 PetscCall(PetscMalloc1(nleaves, &iremote));
1030 PetscCallMPI(MPI_Comm_rank(ycomm, &yrank));
1031 for (i = disp = 0; i < nrecv; i++) {
1032 for (j = 0; j < rlens[i]; j++) {
1033 k = disp + j; /* k-th index pair */
1034 ilocal[k] = ryindices[k] - yrange[yrank]; /* Convert y's global index to local index */
1035 PetscCall(PetscLayoutFindOwnerIndex(xlayout, rxindices[k], &rank, &iremote[k].index)); /* Convert x's global index to (rank, index) */
1036 iremote[k].rank = rank;
1037 }
1038 disp += rlens[i];
1039 }
1040
1041 PetscCall(PetscFree2(sstart, sendto));
1042 PetscCall(PetscFree(rlens));
1043 PetscCall(PetscFree(recvfrom));
1044 PetscCall(PetscFree(reqs));
1045 PetscCall(PetscFree2(rxindices, ryindices));
1046 PetscCall(PetscFree2(xindices_sorted, yindices_sorted));
1047 #endif
1048 } else {
1049 /* PtoS or StoS */
1050 PetscCall(ISGetLocalSize(iyy, &nleaves));
1051 PetscCall(PetscMalloc1(nleaves, &ilocal));
1052 PetscCall(PetscMalloc1(nleaves, &iremote));
1053 PetscCall(PetscArraycpy(ilocal, yindices, nleaves));
1054 for (i = 0; i < nleaves; i++) {
1055 PetscCall(PetscLayoutFindOwnerIndex(xlayout, xindices[i], &rank, &iremote[i].index));
1056 iremote[i].rank = rank;
1057 }
1058 }
1059
1060 /* MUST build SF on xx's comm, which is not necessarily identical to yy's comm.
1061 In SF's view, xx contains the roots (i.e., the remote) and iremote[].rank are ranks in xx's comm.
1062 yy contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
1063 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)xx), &sf));
1064 sf->allow_multi_leaves = PETSC_TRUE;
1065 PetscCall(PetscSFSetFromOptions(sf));
1066 PetscCall(VecGetLocalSize(xx, &nroots));
1067 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); /* Give ilocal/iremote to PETSc and no need to free them here */
1068
1069 /* Free memory no longer needed */
1070 PetscCall(ISRestoreIndices(ixx, &xindices));
1071 PetscCall(ISRestoreIndices(iyy, &yindices));
1072 if (can_do_block_opt) {
1073 PetscCall(VecDestroy(&xx));
1074 PetscCall(VecDestroy(&yy));
1075 PetscCall(ISDestroy(&ixx));
1076 PetscCall(ISDestroy(&iyy));
1077 } else if (xcommsize == 1) {
1078 PetscCall(VecDestroy(&xx));
1079 }
1080
1081 functionend:
1082 sf->vscat.bs = bs;
1083 if (sf->vscat.bs > 1) {
1084 PetscMPIInt ibs;
1085
1086 PetscCall(PetscMPIIntCast(sf->vscat.bs, &ibs));
1087 PetscCallMPI(MPI_Type_contiguous(ibs, MPIU_SCALAR, &sf->vscat.unit));
1088 PetscCallMPI(MPI_Type_commit(&sf->vscat.unit));
1089 } else {
1090 sf->vscat.unit = MPIU_SCALAR;
1091 }
1092 PetscCall(VecGetLocalSize(x, &sf->vscat.from_n));
1093 PetscCall(VecGetLocalSize(y, &sf->vscat.to_n));
1094 if (!ix_old) PetscCall(ISDestroy(&ix)); /* We created helper ix, iy. Free them */
1095 if (!iy_old) PetscCall(ISDestroy(&iy));
1096
1097 /* Set default */
1098 PetscCall(VecScatterSetFromOptions(sf));
1099 PetscCall(PetscSFSetUp(sf));
1100
1101 *newsf = sf;
1102 PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104
1105 /*@
1106 VecScatterCreateToAll - Creates a vector and a scatter context that copies all
1107 vector values to each processor
1108
1109 Collective
1110
1111 Input Parameter:
1112 . vin - an `MPIVEC`
1113
1114 Output Parameters:
1115 + ctx - scatter context
1116 - vout - output `SEQVEC` that is large enough to scatter into
1117
1118 Level: intermediate
1119
1120 Example Usage:
1121 .vb
1122 VecScatterCreateToAll(vin, &ctx, &vout);
1123
1124 // scatter as many times as you need
1125 VecScatterBegin(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1126 VecScatterEnd(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1127
1128 // destroy scatter context and local vector when no longer needed
1129 VecScatterDestroy(&ctx);
1130 VecDestroy(&vout);
1131 .ve
1132
1133 Notes:
1134 `vout` may be `NULL` [`PETSC_NULL_VEC` from Fortran] if you do not
1135 need to have it created
1136
1137 Do NOT create a vector and then pass it in as the final argument `vout`! `vout` is created by this routine
1138 automatically (unless you pass `NULL` in for that argument if you do not need it).
1139
1140 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCreateToZero()`, `VecScatterBegin()`, `VecScatterEnd()`
1141 @*/
VecScatterCreateToAll(Vec vin,VecScatter * ctx,Vec * vout)1142 PetscErrorCode VecScatterCreateToAll(Vec vin, VecScatter *ctx, Vec *vout)
1143 {
1144 PetscInt N;
1145 IS is;
1146 Vec tmp;
1147 Vec *tmpv;
1148 PetscBool tmpvout = PETSC_FALSE;
1149 VecType roottype;
1150
1151 PetscFunctionBegin;
1152 PetscValidHeaderSpecific(vin, VEC_CLASSID, 1);
1153 PetscValidType(vin, 1);
1154 PetscAssertPointer(ctx, 2);
1155 if (vout) {
1156 PetscAssertPointer(vout, 3);
1157 tmpv = vout;
1158 } else {
1159 tmpvout = PETSC_TRUE;
1160 tmpv = &tmp;
1161 }
1162
1163 /* Create seq vec on each proc, with the same size of the original vec */
1164 PetscCall(VecGetSize(vin, &N));
1165 PetscCall(VecGetRootType_Private(vin, &roottype));
1166 PetscCall(VecCreate(PETSC_COMM_SELF, tmpv));
1167 PetscCall(VecSetSizes(*tmpv, N, PETSC_DECIDE));
1168 PetscCall(VecSetType(*tmpv, roottype));
1169 /* Create the VecScatter ctx with the communication info */
1170 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is));
1171 PetscCall(VecScatterCreate(vin, is, *tmpv, is, ctx));
1172 PetscCall(ISDestroy(&is));
1173 if (tmpvout) PetscCall(VecDestroy(tmpv));
1174 PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176
1177 /*@
1178 VecScatterCreateToZero - Creates an output vector and a scatter context used to
1179 copy all vector values into the output vector on the zeroth processor
1180
1181 Collective
1182
1183 Input Parameter:
1184 . vin - `Vec` of type `MPIVEC`
1185
1186 Output Parameters:
1187 + ctx - scatter context
1188 - vout - output `SEQVEC` that is large enough to scatter into on processor 0 and
1189 of length zero on all other processors
1190
1191 Level: intermediate
1192
1193 Example Usage:
1194 .vb
1195 VecScatterCreateToZero(vin, &ctx, &vout);
1196
1197 // scatter as many times as you need
1198 VecScatterBegin(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1199 VecScatterEnd(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1200
1201 // destroy scatter context and local vector when no longer needed
1202 VecScatterDestroy(&ctx);
1203 VecDestroy(&vout);
1204 .ve
1205
1206 Notes:
1207 vout may be `NULL` [`PETSC_NULL_VEC` from Fortran] if you do not
1208 need to have it created
1209
1210 Do NOT create a vector and then pass it in as the final argument `vout`! `vout` is created by this routine
1211 automatically (unless you pass `NULL` in for that argument if you do not need it).
1212
1213 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCreateToAll()`, `VecScatterBegin()`, `VecScatterEnd()`
1214 @*/
VecScatterCreateToZero(Vec vin,VecScatter * ctx,Vec * vout)1215 PetscErrorCode VecScatterCreateToZero(Vec vin, VecScatter *ctx, Vec *vout)
1216 {
1217 PetscInt N;
1218 PetscMPIInt rank;
1219 IS is;
1220 Vec tmp;
1221 Vec *tmpv;
1222 PetscBool tmpvout = PETSC_FALSE;
1223 VecType roottype;
1224
1225 PetscFunctionBegin;
1226 PetscValidHeaderSpecific(vin, VEC_CLASSID, 1);
1227 PetscValidType(vin, 1);
1228 PetscAssertPointer(ctx, 2);
1229 if (vout) {
1230 PetscAssertPointer(vout, 3);
1231 tmpv = vout;
1232 } else {
1233 tmpvout = PETSC_TRUE;
1234 tmpv = &tmp;
1235 }
1236
1237 /* Create vec on each proc, with the same size of the original vec all on process 0 */
1238 PetscCall(VecGetSize(vin, &N));
1239 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vin), &rank));
1240 if (rank) N = 0;
1241 PetscCall(VecGetRootType_Private(vin, &roottype));
1242 PetscCall(VecCreate(PETSC_COMM_SELF, tmpv));
1243 PetscCall(VecSetSizes(*tmpv, N, PETSC_DECIDE));
1244 PetscCall(VecSetType(*tmpv, roottype));
1245 /* Create the VecScatter ctx with the communication info */
1246 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is));
1247 PetscCall(VecScatterCreate(vin, is, *tmpv, is, ctx));
1248 PetscCall(ISDestroy(&is));
1249 if (tmpvout) PetscCall(VecDestroy(tmpv));
1250 PetscFunctionReturn(PETSC_SUCCESS);
1251 }
1252
1253 /*@
1254 VecScatterBegin - Begins a generalized scatter from one vector to
1255 another. Complete the scattering phase with `VecScatterEnd()`.
1256
1257 Neighbor-wise Collective
1258
1259 Input Parameters:
1260 + sf - scatter context generated by `VecScatterCreate()`
1261 . x - the vector from which we scatter
1262 . y - the vector to which we scatter
1263 . addv - either `ADD_VALUES`, `MAX_VALUES`, `MIN_VALUES` or `INSERT_VALUES`, with `INSERT_VALUES` mode any location
1264 not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1265 - mode - the scattering mode, usually `SCATTER_FORWARD`. The available modes are: `SCATTER_FORWARD` or `SCATTER_REVERSE`
1266
1267 Level: intermediate
1268
1269 Notes:
1270 The vectors `x` and `y` need not be the same vectors used in the call
1271 to `VecScatterCreate()`, but `x` must have the same parallel data layout
1272 as that passed in as the `x` to `VecScatterCreate()`, similarly for the `y`.
1273 Most likely they have been obtained from `VecDuplicate()`.
1274
1275 You cannot change the values in the input vector between the calls to `VecScatterBegin()`
1276 and `VecScatterEnd()`.
1277
1278 If you use `SCATTER_REVERSE` the two arguments `x` and `y` should be reversed, from
1279 the `SCATTER_FORWARD`.
1280
1281 .vb
1282 y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1283 .ve
1284
1285 This scatter is far more general than the conventional
1286 scatter, since it can be a gather or a scatter or a combination,
1287 depending on the indices ix and iy. If x is a parallel vector and y
1288 is sequential, `VecScatterBegin()` can serve to gather values to a
1289 single processor. Similarly, if `y` is parallel and `x` sequential, the
1290 routine can scatter from one processor to many processors.
1291
1292 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterEnd()`, `InsertMode`, `ScatterMode`
1293 @*/
VecScatterBegin(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)1294 PetscErrorCode VecScatterBegin(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1295 {
1296 PetscInt to_n, from_n;
1297
1298 PetscFunctionBegin;
1299 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1300 PetscValidHeaderSpecific(x, VEC_CLASSID, 2);
1301 PetscValidHeaderSpecific(y, VEC_CLASSID, 3);
1302 if (PetscDefined(USE_DEBUG)) {
1303 /*
1304 Error checking to make sure these vectors match the vectors used
1305 to create the vector scatter context. -1 in the from_n and to_n indicate the
1306 vector lengths are unknown (for example with mapped scatters) and thus
1307 no error checking is performed.
1308 */
1309 if (sf->vscat.from_n >= 0 && sf->vscat.to_n >= 0) {
1310 PetscCall(VecGetLocalSize(x, &from_n));
1311 PetscCall(VecGetLocalSize(y, &to_n));
1312 if (mode & SCATTER_REVERSE) {
1313 PetscCheck(to_n == sf->vscat.from_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter reverse and vector to != sf from size)", to_n, sf->vscat.from_n);
1314 PetscCheck(from_n == sf->vscat.to_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter reverse and vector from != sf to size)", from_n, sf->vscat.to_n);
1315 } else {
1316 PetscCheck(to_n == sf->vscat.to_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter forward and vector to != sf to size)", to_n, sf->vscat.to_n);
1317 PetscCheck(from_n == sf->vscat.from_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter forward and vector from != sf from size)", from_n, sf->vscat.from_n);
1318 }
1319 }
1320 }
1321
1322 sf->vscat.logging = PETSC_TRUE;
1323 PetscCall(PetscLogEventBegin(VEC_ScatterBegin, sf, x, y, 0));
1324 PetscCall(VecScatterBegin_Internal(sf, x, y, addv, mode));
1325 if (sf->vscat.beginandendtogether) PetscCall(VecScatterEnd_Internal(sf, x, y, addv, mode));
1326 PetscCall(PetscLogEventEnd(VEC_ScatterBegin, sf, x, y, 0));
1327 sf->vscat.logging = PETSC_FALSE;
1328 PetscFunctionReturn(PETSC_SUCCESS);
1329 }
1330
1331 /*@
1332 VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1333 after first calling `VecScatterBegin()`.
1334
1335 Neighbor-wise Collective
1336
1337 Input Parameters:
1338 + sf - scatter context generated by `VecScatterCreate()`
1339 . x - the vector from which we scatter
1340 . y - the vector to which we scatter
1341 . addv - one of `ADD_VALUES`, `MAX_VALUES`, `MIN_VALUES` or `INSERT_VALUES`
1342 - mode - the scattering mode, usually `SCATTER_FORWARD`. The available modes are: `SCATTER_FORWARD`, `SCATTER_REVERSE`
1343
1344 Level: intermediate
1345
1346 Notes:
1347 If you use `SCATTER_REVERSE` the arguments `x` and `y` should be reversed, from the `SCATTER_FORWARD`.
1348
1349 y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1350
1351 .seealso: [](sec_scatter), `VecScatter`, `VecScatterBegin()`, `VecScatterCreate()`
1352 @*/
VecScatterEnd(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)1353 PetscErrorCode VecScatterEnd(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1354 {
1355 PetscFunctionBegin;
1356 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1357 PetscValidHeaderSpecific(x, VEC_CLASSID, 2);
1358 PetscValidHeaderSpecific(y, VEC_CLASSID, 3);
1359 if (!sf->vscat.beginandendtogether) {
1360 sf->vscat.logging = PETSC_TRUE;
1361 PetscCall(PetscLogEventBegin(VEC_ScatterEnd, sf, x, y, 0));
1362 PetscCall(VecScatterEnd_Internal(sf, x, y, addv, mode));
1363 PetscCall(PetscLogEventEnd(VEC_ScatterEnd, sf, x, y, 0));
1364 sf->vscat.logging = PETSC_FALSE;
1365 }
1366 PetscFunctionReturn(PETSC_SUCCESS);
1367 }
1368