xref: /petsc/src/vec/vec/utils/vscat.c (revision b69d2765e9c8cde17308f61ee677dcd992b8a9cf)
1 #include <petsc/private/sfimpl.h>                 /*I "petscsf.h" I*/
2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> /* for VecScatterRemap_Internal */
3 #include <../src/vec/is/sf/impls/basic/sfpack.h>
4 #include <petsc/private/vecimpl.h>
5 
6 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm, const PetscMPIInt[], const PetscInt[], PetscMPIInt *);
7 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm, PetscMPIInt, PetscMPIInt, const PetscInt[], PetscMPIInt **, PetscInt **);
8 
9 typedef enum {
10   IS_INVALID,
11   IS_GENERAL,
12   IS_BLOCK,
13   IS_STRIDE
14 } ISTypeID;
15 
ISGetTypeID_Private(IS is,ISTypeID * id)16 static inline PetscErrorCode ISGetTypeID_Private(IS is, ISTypeID *id)
17 {
18   PetscBool same;
19 
20   PetscFunctionBegin;
21   *id = IS_INVALID;
22   PetscCall(PetscObjectTypeCompare((PetscObject)is, ISGENERAL, &same));
23   if (same) {
24     *id = IS_GENERAL;
25     goto functionend;
26   }
27   PetscCall(PetscObjectTypeCompare((PetscObject)is, ISBLOCK, &same));
28   if (same) {
29     *id = IS_BLOCK;
30     goto functionend;
31   }
32   PetscCall(PetscObjectTypeCompare((PetscObject)is, ISSTRIDE, &same));
33   if (same) {
34     *id = IS_STRIDE;
35     goto functionend;
36   }
37 functionend:
38   PetscFunctionReturn(PETSC_SUCCESS);
39 }
40 
VecScatterBegin_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)41 static PetscErrorCode VecScatterBegin_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
42 {
43   PetscSF      wsf = NULL; /* either sf or its local part */
44   MPI_Op       mop = MPI_OP_NULL;
45   PetscMPIInt  size;
46   PetscMemType xmtype = PETSC_MEMTYPE_HOST, ymtype = PETSC_MEMTYPE_HOST;
47 
48   PetscFunctionBegin;
49   if (x != y) PetscCall(VecLockReadPush(x));
50   PetscCall(VecGetArrayReadAndMemType(x, &sf->vscat.xdata, &xmtype));
51   PetscCall(VecGetArrayAndMemType(y, &sf->vscat.ydata, &ymtype));
52   PetscCall(VecLockWriteSet(y, PETSC_TRUE));
53 
54   /* SCATTER_FORWARD_LOCAL indicates ignoring inter-process communication */
55   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
56   if ((mode & SCATTER_FORWARD_LOCAL) && size > 1) { /* Lazy creation of sf->vscat.lsf since SCATTER_FORWARD_LOCAL is uncommon */
57     if (!sf->vscat.lsf) PetscCall(PetscSFCreateLocalSF_Private(sf, &sf->vscat.lsf));
58     wsf = sf->vscat.lsf;
59   } else {
60     wsf = sf;
61   }
62 
63   /* Note xdata/ydata is always recorded on sf (not lsf) above */
64   if (addv == INSERT_VALUES) mop = MPI_REPLACE;
65   else if (addv == ADD_VALUES) mop = MPIU_SUM; /* PETSc defines its own MPI datatype and SUM operation for __float128 etc. */
66   else if (addv == MAX_VALUES) mop = MPIU_MAX;
67   else if (addv == MIN_VALUES) mop = MPIU_MIN;
68   else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);
69 
70   if (mode & SCATTER_REVERSE) { /* REVERSE indicates leaves to root scatter. Note that x and y are swapped in input */
71     PetscCall(PetscSFReduceWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop));
72   } else { /* FORWARD indicates x to y scatter, where x is root and y is leaf */
73     PetscCall(PetscSFBcastWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop));
74   }
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
VecScatterEnd_Internal(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)78 static PetscErrorCode VecScatterEnd_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
79 {
80   PetscSF     wsf = NULL;
81   MPI_Op      mop = MPI_OP_NULL;
82   PetscMPIInt size;
83 
84   PetscFunctionBegin;
85   /* SCATTER_FORWARD_LOCAL indicates ignoring inter-process communication */
86   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
87   wsf = ((mode & SCATTER_FORWARD_LOCAL) && size > 1) ? sf->vscat.lsf : sf;
88 
89   if (addv == INSERT_VALUES) mop = MPI_REPLACE;
90   else if (addv == ADD_VALUES) mop = MPIU_SUM;
91   else if (addv == MAX_VALUES) mop = MPIU_MAX;
92   else if (addv == MIN_VALUES) mop = MPIU_MIN;
93   else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);
94 
95   if (mode & SCATTER_REVERSE) { /* reverse scatter sends leaves to roots. Note that x and y are swapped in input */
96     PetscCall(PetscSFReduceEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop));
97   } else { /* forward scatter sends roots to leaves, i.e., x to y */
98     PetscCall(PetscSFBcastEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop));
99   }
100 
101   PetscCall(VecRestoreArrayReadAndMemType(x, &sf->vscat.xdata));
102   if (x != y) PetscCall(VecLockReadPop(x));
103   PetscCall(VecRestoreArrayAndMemType(y, &sf->vscat.ydata));
104   PetscCall(VecLockWriteSet(y, PETSC_FALSE));
105   PetscFunctionReturn(PETSC_SUCCESS);
106 }
107 
108 /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input sf scatters
109    x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j]. Note that in SF,
110    x is roots. That means we need to change incoming stuffs such as bas->irootloc[].
111  */
VecScatterRemap_Internal(VecScatter sf,const PetscInt * tomap,const PetscInt * frommap)112 static PetscErrorCode VecScatterRemap_Internal(VecScatter sf, const PetscInt *tomap, const PetscInt *frommap)
113 {
114   PetscInt       i, bs = sf->vscat.bs;
115   PetscMPIInt    size;
116   PetscBool      ident = PETSC_TRUE, isbasic, isneighbor;
117   PetscSFType    type;
118   PetscSF_Basic *bas = NULL;
119 
120   PetscFunctionBegin;
121   /* check if it is an identity map. If it is, do nothing */
122   if (tomap) {
123     for (i = 0; i < sf->nroots * bs; i++) {
124       if (i != tomap[i]) {
125         ident = PETSC_FALSE;
126         break;
127       }
128     }
129     if (ident) PetscFunctionReturn(PETSC_SUCCESS);
130   }
131   PetscCheck(!frommap, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unable to remap the FROM in scatters yet");
132   if (!tomap) PetscFunctionReturn(PETSC_SUCCESS);
133 
134   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
135 
136   /* Since the indices changed, we must also update the local SF. But we do not do it since
137      lsf is rarely used. We just destroy lsf and rebuild it on demand from updated sf.
138   */
139   if (sf->vscat.lsf) PetscCall(PetscSFDestroy(&sf->vscat.lsf));
140 
141   PetscCall(PetscSFGetType(sf, &type));
142   PetscCall(PetscObjectTypeCompare((PetscObject)sf, PETSCSFBASIC, &isbasic));
143   PetscCall(PetscObjectTypeCompare((PetscObject)sf, PETSCSFNEIGHBOR, &isneighbor));
144   PetscCheck(isbasic || isneighbor, PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "VecScatterRemap on SF type %s is not supported", type);
145 
146   PetscCall(PetscSFSetUp(sf)); /* to build sf->irootloc if SetUp is not yet called */
147 
148   /* Root indices are going to be remapped. This is tricky for SF. Root indices are used in sf->rremote,
149     sf->remote and bas->irootloc. The latter one is cheap to remap, but the former two are not.
150     To remap them, we have to do a bcast from roots to leaves, to let leaves know their updated roots.
151     Since VecScatterRemap is supposed to be a cheap routine to adapt a vecscatter by only changing where
152     x[] data is taken, we do not remap sf->rremote, sf->remote. The consequence is that operations
153     accessing them (such as PetscSFCompose) may get stale info. Considering VecScatter does not need
154     that complicated SF operations, we do not remap sf->rremote, sf->remote, instead we destroy them
155     so that code accessing them (if any) will crash (instead of get silent errors). Note that BcastAndOp/Reduce,
156     which are used by VecScatter and only rely on bas->irootloc, are updated and correct.
157   */
158   sf->remote = NULL;
159   PetscCall(PetscFree(sf->remote_alloc));
160   /* Not easy to free sf->rremote since it was allocated with PetscMalloc4(), so just give it crazy values */
161   for (i = 0; i < sf->roffset[sf->nranks]; i++) sf->rremote[i] = PETSC_INT_MIN;
162 
163   /* Indices in tomap[] are for each individual vector entry. But indices in sf are for each
164      block in the vector. So before the remapping, we have to expand indices in sf by bs, and
165      after the remapping, we have to shrink them back.
166    */
167   bas = (PetscSF_Basic *)sf->data;
168   for (i = 0; i < bas->ioffset[bas->niranks]; i++) bas->irootloc[i] = tomap[bas->irootloc[i] * bs] / bs;
169 #if defined(PETSC_HAVE_DEVICE)
170   /* Free the irootloc copy on device. We allocate a new copy and get the updated value on demand. See PetscSFLinkGetRootPackOptAndIndices() */
171   for (i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
172 #endif
173   /* Destroy and then rebuild root packing optimizations since indices are changed */
174   PetscCall(PetscSFResetPackFields(sf));
175   PetscCall(PetscSFSetUpPackFields(sf));
176   PetscFunctionReturn(PETSC_SUCCESS);
177 }
178 
179 /*
180   Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
181 
182   Input Parameters:
183 + sf   - the context (must be a parallel vecscatter)
184 - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
185 
186   Output parameters:
187 + num_procs   - number of remote processors
188 - num_entries - number of vector entries to send or recv
189 
190   Notes:
191   Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
192   usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
193 
194 .seealso: [](sec_scatter), `VecScatterGetRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
195  */
VecScatterGetRemoteCount_Private(VecScatter sf,PetscBool send,PetscInt * num_procs,PetscInt * num_entries)196 PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter sf, PetscBool send, PetscInt *num_procs, PetscInt *num_entries)
197 {
198   PetscMPIInt        nranks, remote_start;
199   PetscMPIInt        rank;
200   const PetscInt    *offset;
201   const PetscMPIInt *ranks;
202 
203   PetscFunctionBegin;
204   PetscCall(PetscSFSetUp(sf));
205   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
206 
207   /* This routine is mainly used for MatMult's Mvctx. In Mvctx, we scatter an MPI vector x to a sequential vector lvec.
208      Remember x is roots and lvec is leaves. 'send' means roots to leaves communication. If 'send' is true, we need to
209      get info about which ranks this processor needs to send to. In other words, we need to call PetscSFGetLeafRanks().
210      If send is false, we do the opposite, calling PetscSFGetRootRanks().
211   */
212   if (send) PetscCall(PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, NULL));
213   else PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, NULL, NULL));
214   if (nranks) {
215     remote_start = (rank == ranks[0]) ? 1 : 0;
216     if (num_procs) *num_procs = nranks - remote_start;
217     if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
218   } else {
219     if (num_procs) *num_procs = 0;
220     if (num_entries) *num_entries = 0;
221   }
222   PetscFunctionReturn(PETSC_SUCCESS);
223 }
224 
225 /* Given a parallel VecScatter context, return a plan that represents the remote communication.
226    Any output parameter can be NULL.
227 
228   Input Parameters:
229 + sf   - the context
230 - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
231 
232   Output parameters:
233 + n        - number of remote processors
234 . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
235              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
236              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
237              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
238 . indices  - indices of entries to send/recv
239 . procs    - ranks of remote processors
240 - bs       - block size
241 
242 .seealso: `VecScatterRestoreRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
243  */
VecScatterGetRemote_Private(VecScatter sf,PetscBool send,PetscMPIInt * n,const PetscInt ** starts,const PetscInt ** indices,const PetscMPIInt ** procs,PetscInt * bs)244 PetscErrorCode VecScatterGetRemote_Private(VecScatter sf, PetscBool send, PetscMPIInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
245 {
246   PetscMPIInt        nranks, remote_start;
247   PetscMPIInt        rank;
248   const PetscInt    *offset, *location;
249   const PetscMPIInt *ranks;
250 
251   PetscFunctionBegin;
252   PetscCall(PetscSFSetUp(sf));
253   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
254 
255   if (send) PetscCall(PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, &location));
256   else PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, &location, NULL));
257 
258   if (nranks) {
259     remote_start = (rank == ranks[0]) ? 1 : 0;
260     if (n) *n = nranks - remote_start;
261     if (starts) *starts = &offset[remote_start];
262     if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
263     if (procs) *procs = &ranks[remote_start];
264   } else {
265     if (n) *n = 0;
266     if (starts) *starts = NULL;
267     if (indices) *indices = NULL;
268     if (procs) *procs = NULL;
269   }
270 
271   if (bs) *bs = 1;
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
276    processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
277 
278   Input Parameters:
279 + sf   - the context
280 - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
281 
282   Output parameters:
283 + n        - number of remote processors
284 . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
285              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
286              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
287              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
288 . indices  - indices of entries to send/recv
289 . procs    - ranks of remote processors
290 - bs       - block size
291 
292   Notes:
293   Output parameters like starts, indices must also be adapted according to the sorted ranks.
294 
295 .seealso: `VecScatterRestoreRemoteOrdered_Private()`, `VecScatterGetRemote_Private()`
296  */
VecScatterGetRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscMPIInt * n,const PetscInt ** starts,const PetscInt ** indices,const PetscMPIInt ** procs,PetscInt * bs)297 PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscMPIInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
298 {
299   PetscFunctionBegin;
300   PetscCall(VecScatterGetRemote_Private(sf, send, n, starts, indices, procs, bs));
301   if (PetscUnlikelyDebug(n && procs)) {
302     PetscMPIInt i;
303     /* from back to front to also handle cases *n=0 */
304     for (i = *n - 1; i > 0; i--) PetscCheck((*procs)[i - 1] <= (*procs)[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "procs[] are not ordered");
305   }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
310    an implementation to free memory allocated in the VecScatterGetRemote_Private call.
311 
312   Input Parameters:
313 + sf   - the context
314 - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
315 
316   Output parameters:
317 + n        - number of remote processors
318 . starts   - starting point in indices for each proc
319 . indices  - indices of entries to send/recv
320 . procs    - ranks of remote processors
321 - bs       - block size
322 
323 .seealso: `VecScatterGetRemote_Private()`
324  */
VecScatterRestoreRemote_Private(VecScatter sf,PetscBool send,PetscMPIInt * n,const PetscInt ** starts,const PetscInt ** indices,const PetscMPIInt ** procs,PetscInt * bs)325 PetscErrorCode VecScatterRestoreRemote_Private(VecScatter sf, PetscBool send, PetscMPIInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
326 {
327   PetscFunctionBegin;
328   if (starts) *starts = NULL;
329   if (indices) *indices = NULL;
330   if (procs) *procs = NULL;
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
334 /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
335    an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
336 
337   Input Parameters:
338 + sf   - the context
339 - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
340 
341   Output parameters:
342 + n        - number of remote processors
343 . starts   - starting point in indices for each proc
344 . indices  - indices of entries to send/recv
345 . procs    - ranks of remote processors
346 - bs       - block size
347 
348 .seealso: `VecScatterGetRemoteOrdered_Private()`
349  */
VecScatterRestoreRemoteOrdered_Private(VecScatter sf,PetscBool send,PetscMPIInt * n,const PetscInt ** starts,const PetscInt ** indices,const PetscMPIInt ** procs,PetscInt * bs)350 PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscMPIInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
351 {
352   PetscFunctionBegin;
353   PetscCall(VecScatterRestoreRemote_Private(sf, send, n, starts, indices, procs, bs));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 /*@
358   VecScatterSetUp - Sets up the `VecScatter` to be able to actually scatter information between vectors
359 
360   Collective
361 
362   Input Parameter:
363 . sf - the scatter context
364 
365   Level: intermediate
366 
367 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCopy()`
368 @*/
VecScatterSetUp(VecScatter sf)369 PetscErrorCode VecScatterSetUp(VecScatter sf)
370 {
371   PetscFunctionBegin;
372   PetscCall(PetscSFSetUp(sf));
373   PetscFunctionReturn(PETSC_SUCCESS);
374 }
375 
376 /*@
377   VecScatterSetType - Builds a vector scatter, for a particular vector scatter implementation.
378 
379   Collective
380 
381   Input Parameters:
382 + sf   - The `VecScatter` object
383 - type - The name of the vector scatter type
384 
385   Options Database Key:
386 . -sf_type <type> - Sets the `VecScatterType`
387 
388   Level: intermediate
389 
390   Note:
391   Use `VecScatterDuplicate()` to form additional vectors scatter of the same type as an existing vector scatter.
392 
393 .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterGetType()`, `VecScatterCreate()`
394 @*/
VecScatterSetType(VecScatter sf,VecScatterType type)395 PetscErrorCode VecScatterSetType(VecScatter sf, VecScatterType type)
396 {
397   PetscFunctionBegin;
398   PetscCall(PetscSFSetType(sf, type));
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 /*@
403   VecScatterGetType - Gets the vector scatter type name (as a string) from the `VecScatter`.
404 
405   Not Collective
406 
407   Input Parameter:
408 . sf - The vector scatter
409 
410   Output Parameter:
411 . type - The vector scatter type name
412 
413   Level: intermediate
414 
415 .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterSetType()`, `VecScatterCreate()`
416 @*/
VecScatterGetType(VecScatter sf,VecScatterType * type)417 PetscErrorCode VecScatterGetType(VecScatter sf, VecScatterType *type)
418 {
419   PetscFunctionBegin;
420   PetscCall(PetscSFGetType(sf, type));
421   PetscFunctionReturn(PETSC_SUCCESS);
422 }
423 
424 /*@C
425   VecScatterRegister -  Adds a new vector scatter component implementation
426 
427   Not Collective
428 
429   Input Parameters:
430 + sname    - The name of a new user-defined creation routine
431 - function - The creation routine
432 
433   Level: advanced
434 
435 .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecRegister()`
436 @*/
VecScatterRegister(const char sname[],PetscErrorCode (* function)(VecScatter))437 PetscErrorCode VecScatterRegister(const char sname[], PetscErrorCode (*function)(VecScatter))
438 {
439   PetscFunctionBegin;
440   PetscCall(PetscSFRegister(sname, function));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 /*@
445   VecScatterGetMerged - Returns true if the scatter is completed in the `VecScatterBegin()`
446   and the `VecScatterEnd()` does nothing
447 
448   Not Collective
449 
450   Input Parameter:
451 . sf - scatter context created with `VecScatterCreate()`
452 
453   Output Parameter:
454 . flg - `PETSC_TRUE` if the `VecScatterBegin()`/`VecScatterEnd()` are all done during the `VecScatterBegin()`
455 
456   Level: developer
457 
458 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterEnd()`, `VecScatterBegin()`
459 @*/
VecScatterGetMerged(VecScatter sf,PetscBool * flg)460 PetscErrorCode VecScatterGetMerged(VecScatter sf, PetscBool *flg)
461 {
462   PetscFunctionBegin;
463   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
464   if (flg) *flg = sf->vscat.beginandendtogether;
465   PetscFunctionReturn(PETSC_SUCCESS);
466 }
467 /*@
468   VecScatterDestroy - Destroys a scatter context created by `VecScatterCreate()`
469 
470   Collective
471 
472   Input Parameter:
473 . sf - the scatter context
474 
475   Level: intermediate
476 
477 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCopy()`
478 @*/
VecScatterDestroy(VecScatter * sf)479 PetscErrorCode VecScatterDestroy(VecScatter *sf)
480 {
481   PetscFunctionBegin;
482   PetscCall(PetscSFDestroy(sf));
483   PetscFunctionReturn(PETSC_SUCCESS);
484 }
485 
486 /*@
487   VecScatterCopy - Makes a copy of a scatter context.
488 
489   Collective
490 
491   Input Parameter:
492 . sf - the scatter context
493 
494   Output Parameter:
495 . newsf - the context copy
496 
497   Level: advanced
498 
499 .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterCreate()`, `VecScatterDestroy()`
500 @*/
VecScatterCopy(VecScatter sf,VecScatter * newsf)501 PetscErrorCode VecScatterCopy(VecScatter sf, VecScatter *newsf)
502 {
503   PetscFunctionBegin;
504   PetscAssertPointer(newsf, 2);
505   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_GRAPH, newsf));
506   PetscCall(PetscSFSetUp(*newsf));
507   PetscFunctionReturn(PETSC_SUCCESS);
508 }
509 
510 /*@
511   VecScatterViewFromOptions - View a `VecScatter` object based on values in the options database
512 
513   Collective
514 
515   Input Parameters:
516 + sf   - the scatter context
517 . obj  - Optional object
518 - name - command line option
519 
520   Level: intermediate
521 
522   Note:
523   See `PetscObjectViewFromOptions()` for available `PetscViewer` and `PetscViewerFormat` values
524 
525 .seealso: [](sec_scatter), `VecScatter`, `VecScatterView()`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
526 @*/
VecScatterViewFromOptions(VecScatter sf,PetscObject obj,const char name[])527 PetscErrorCode VecScatterViewFromOptions(VecScatter sf, PetscObject obj, const char name[])
528 {
529   PetscFunctionBegin;
530   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
531   PetscCall(PetscObjectViewFromOptions((PetscObject)sf, obj, name));
532   PetscFunctionReturn(PETSC_SUCCESS);
533 }
534 
535 /*@
536   VecScatterView - Views a vector scatter context.
537 
538   Collective
539 
540   Input Parameters:
541 + sf     - the scatter context
542 - viewer - the viewer for displaying the context
543 
544   Level: intermediate
545 
546 .seealso: [](sec_scatter), `VecScatter`, `PetscViewer`, `VecScatterViewFromOptions()`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
547 @*/
VecScatterView(VecScatter sf,PetscViewer viewer)548 PetscErrorCode VecScatterView(VecScatter sf, PetscViewer viewer)
549 {
550   PetscFunctionBegin;
551   PetscCall(PetscSFView(sf, viewer));
552   PetscFunctionReturn(PETSC_SUCCESS);
553 }
554 
555 /*@
556   VecScatterRemap - Remaps the "from" and "to" indices in a
557   vector scatter context.
558 
559   Collective
560 
561   Input Parameters:
562 + sf      - vector scatter context
563 . tomap   - remapping plan for "to" indices (may be `NULL`).
564 - frommap - remapping plan for "from" indices (may be `NULL`)
565 
566   Level: developer
567 
568   Notes:
569   In the parallel case the todata contains indices from where the data is taken
570   (and then sent to others)! The fromdata contains indices from where the received
571   data is finally put locally.
572 
573   In the sequential case the todata contains indices from where the data is put
574   and the fromdata contains indices from where the data is taken from.
575   This is backwards from the parallel case!
576 
577 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`
578 @*/
VecScatterRemap(VecScatter sf,PetscInt tomap[],PetscInt frommap[])579 PetscErrorCode VecScatterRemap(VecScatter sf, PetscInt tomap[], PetscInt frommap[])
580 {
581   PetscFunctionBegin;
582   if (tomap) PetscAssertPointer(tomap, 2);
583   if (frommap) PetscAssertPointer(frommap, 3);
584   PetscCall(VecScatterRemap_Internal(sf, tomap, frommap));
585   PetscCheck(!frommap, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unable to remap the FROM in scatters yet");
586   /* Mark then vector lengths as unknown because we do not know the lengths of the remapped vectors */
587   sf->vscat.from_n = -1;
588   sf->vscat.to_n   = -1;
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 /*@
593   VecScatterSetFromOptions - Configures the vector scatter from values in the options database.
594 
595   Collective
596 
597   Input Parameter:
598 . sf - The vector scatter
599 
600   Notes:
601   To see all options, run your program with the -help option, or consult the users manual.
602 
603   Must be called before `VecScatterSetUp()` and before the vector scatter is used.
604 
605   Level: beginner
606 
607 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterDestroy()`, `VecScatterSetUp()`
608 @*/
VecScatterSetFromOptions(VecScatter sf)609 PetscErrorCode VecScatterSetFromOptions(VecScatter sf)
610 {
611   PetscFunctionBegin;
612   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
613   PetscObjectOptionsBegin((PetscObject)sf);
614 
615   sf->vscat.beginandendtogether = PETSC_FALSE;
616   PetscCall(PetscOptionsBool("-vecscatter_merge", "Use combined (merged) vector scatter begin and end", "VecScatterCreate", sf->vscat.beginandendtogether, &sf->vscat.beginandendtogether, NULL));
617   if (sf->vscat.beginandendtogether) PetscCall(PetscInfo(sf, "Using combined (merged) vector scatter begin and end\n"));
618   PetscOptionsEnd();
619   PetscFunctionReturn(PETSC_SUCCESS);
620 }
621 
622 /*@
623   VecScatterCreate - Creates a vector scatter `VecScatter` context that is used to communicate entries between two vectors `Vec`
624 
625   Collective
626 
627   Input Parameters:
628 + x  - a vector that defines the shape (parallel data layout of the vector) of vectors from which we scatter
629 . y  - a vector that defines the shape (parallel data layout of the vector) of vectors to which we scatter
630 . ix - the indices of `x` to scatter (if `NULL` scatters all values)
631 - iy - the indices of `y` to hold results (if `NULL` fills entire vector `yin` in order)
632 
633   Output Parameter:
634 . newsf - location to store the new scatter context
635 
636   Options Database Keys:
637 + -vecscatter_view              - Prints detail of communications
638 . -vecscatter_view ::ascii_info - Print less details about communication
639 - -vecscatter_merge             - `VecScatterBegin()` handles all of the communication, `VecScatterEnd()` is a nop
640                                   eliminates the chance for overlap of computation and communication
641 
642   Level: intermediate
643 
644   Notes:
645   If both `x` and `y` are parallel, their communicator must be on the same
646   set of processes, but their process order can be different.
647   In calls to the scatter options you can use different vectors than the `x` and
648   `y` you used above; BUT they must have the same parallel data layout, for example,
649   they could be obtained from `VecDuplicate()`.
650   A `VecScatter` context CANNOT be used in two or more simultaneous scatters;
651   that is you cannot call a second `VecScatterBegin()` with the same scatter
652   context until the `VecScatterEnd()` has been called on the first `VecScatterBegin()`.
653   In this case a separate `VecScatter` is needed for each concurrent scatter.
654 
655   Both `ix` and `iy` cannot be `NULL` at the same time.
656 
657   Use `VecScatterCreateToAll()` to create a `VecScatter` that copies an MPI vector to sequential vectors on all MPI processes.
658   Use `VecScatterCreateToZero()` to create a `VecScatter` that copies an MPI vector to a sequential vector on MPI rank 0.
659   These special `VecScatter` have better performance than general ones.
660 
661   Developer Note:
662   The implementations of most the `VecScatter` are done using `PetscSF`.
663 
664 .seealso: [](sec_scatter), `VecScatter`, `VecScatterDestroy()`, `VecScatterCreateToAll()`, `VecScatterCreateToZero()`, `PetscSFCreate()`,
665           `VecScatterType`, `InsertMode`, `ScatterMode`, `VecScatterBegin()`, `VecScatterEnd()`
666 @*/
VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter * newsf)667 PetscErrorCode VecScatterCreate(Vec x, IS ix, Vec y, IS iy, VecScatter *newsf)
668 {
669   MPI_Comm        xcomm, ycomm, bigcomm;
670   Vec             xx, yy;
671   IS              ix_old = ix, iy_old = iy, ixx, iyy;
672   PetscMPIInt     xcommsize, ycommsize, rank, result;
673   PetscInt        i, n, N, nroots, nleaves, *ilocal, xstart, ystart, ixsize, iysize, xlen, ylen;
674   const PetscInt *xindices, *yindices;
675   PetscSFNode    *iremote;
676   PetscLayout     xlayout, ylayout;
677   ISTypeID        ixid, iyid;
678   PetscInt        bs, bsx, bsy, min, max, m[2], mg[2], ixfirst, ixstep, iyfirst, iystep;
679   PetscBool       can_do_block_opt = PETSC_FALSE;
680   PetscSF         sf;
681 
682   PetscFunctionBegin;
683   PetscAssertPointer(newsf, 5);
684   PetscCheck(ix || iy, PetscObjectComm((PetscObject)x), PETSC_ERR_SUP, "Cannot pass default in for both input and output indices");
685 
686   /* Get comm from x and y */
687   PetscCall(PetscObjectGetComm((PetscObject)x, &xcomm));
688   PetscCallMPI(MPI_Comm_size(xcomm, &xcommsize));
689   PetscCall(PetscObjectGetComm((PetscObject)y, &ycomm));
690   PetscCallMPI(MPI_Comm_size(ycomm, &ycommsize));
691   if (xcommsize > 1 && ycommsize > 1) {
692     PetscCallMPI(MPI_Comm_compare(xcomm, ycomm, &result));
693     PetscCheck(result != MPI_UNEQUAL, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMECOMM, "VecScatterCreate: parallel vectors x and y must have identical/congruent/similar communicators");
694   }
695   bs = 1; /* default, no blocking */
696 
697   /*
698    Let P and S stand for parallel and sequential vectors respectively. There are four combinations of vecscatters: PtoP, PtoS,
699    StoP and StoS. The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix
700    contains global indices of x. If x is sequential, ix contains local indices of x. Similarly for y and iy.
701 
702    SF builds around concepts of local leaves and remote roots. We treat source vector x as roots and destination vector y as
703    leaves. A PtoS scatter can be naturally mapped to SF. We transform PtoP and StoP to PtoS, and treat StoS as trivial PtoS.
704   */
705 
706   /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
707   if (!ix) {
708     if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
709       PetscCall(VecGetSize(x, &N));
710       PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &ix));
711     } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
712       PetscCall(VecGetLocalSize(x, &n));
713       PetscCall(VecGetOwnershipRange(x, &xstart, NULL));
714       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, xstart, 1, &ix));
715     }
716   }
717 
718   if (!iy) {
719     if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
720       PetscCall(VecGetSize(y, &N));
721       PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iy));
722     } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
723       PetscCall(VecGetLocalSize(y, &n));
724       PetscCall(VecGetOwnershipRange(y, &ystart, NULL));
725       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, ystart, 1, &iy));
726     }
727   }
728 
729   /* Do error checking immediately after we have non-empty ix, iy */
730   PetscCall(ISGetLocalSize(ix, &ixsize));
731   PetscCall(ISGetLocalSize(iy, &iysize));
732   PetscCall(VecGetSize(x, &xlen));
733   PetscCall(VecGetSize(y, &ylen));
734   PetscCheck(ixsize == iysize, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Scatter sizes of ix and iy don't match locally ix=%" PetscInt_FMT " iy=%" PetscInt_FMT, ixsize, iysize);
735   PetscCall(ISGetMinMax(ix, &min, &max));
736   PetscCheck(min >= 0 && max < xlen, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Scatter indices in ix are out of range: found [%" PetscInt_FMT ",%" PetscInt_FMT "), expected in [0,%" PetscInt_FMT ")", min, max, xlen);
737   PetscCall(ISGetMinMax(iy, &min, &max));
738   PetscCheck(min >= 0 && max < ylen, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Scatter indices in iy are out of range: found [%" PetscInt_FMT ",%" PetscInt_FMT "), expected in [0,%" PetscInt_FMT ")", min, max, ylen);
739 
740   /* Extract info about ix, iy for further test */
741   PetscCall(ISGetTypeID_Private(ix, &ixid));
742   PetscCall(ISGetTypeID_Private(iy, &iyid));
743   if (ixid == IS_BLOCK) PetscCall(ISGetBlockSize(ix, &bsx));
744   else if (ixid == IS_STRIDE) PetscCall(ISStrideGetInfo(ix, &ixfirst, &ixstep));
745 
746   if (iyid == IS_BLOCK) PetscCall(ISGetBlockSize(iy, &bsy));
747   else if (iyid == IS_STRIDE) PetscCall(ISStrideGetInfo(iy, &iyfirst, &iystep));
748 
749   /* Check if a PtoS is special ToAll/ToZero scatters, which can be results of VecScatterCreateToAll/Zero.
750      ToAll means a whole MPI vector is copied to a seq vector on every process. ToZero means a whole MPI
751      vector is copied to a seq vector on rank 0 and other processes do nothing(i.e.,they input empty ix,iy).
752 
753      We can optimize these scatters with MPI collectives. We can also avoid costly analysis used for general scatters.
754   */
755   if (xcommsize > 1 && ycommsize == 1) { /* Ranks do not diverge at this if-test */
756     PetscInt    pattern[2] = {0, 0};     /* A boolean array with pattern[0] for allgather-like (ToAll) and pattern[1] for gather-like (ToZero) */
757     PetscLayout map;
758 
759     PetscCallMPI(MPI_Comm_rank(xcomm, &rank));
760     PetscCall(VecGetLayout(x, &map));
761     if (rank == 0) {
762       if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
763         /* Rank 0 scatters the whole mpi x to seq y, so it is either a ToAll or a ToZero candidate in its view */
764         pattern[0] = pattern[1] = 1;
765       }
766     } else {
767       if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
768         /* Other ranks also scatter the whole mpi x to seq y, so it is a ToAll candidate in their view */
769         pattern[0] = 1;
770       } else if (ixsize == 0) {
771         /* Other ranks do nothing, so it is a ToZero candidate */
772         pattern[1] = 1;
773       }
774     }
775 
776     /* One stone (the expensive allreduce) two birds: pattern[] tells if it is ToAll or ToZero */
777     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, pattern, 2, MPIU_INT, MPI_LAND, xcomm));
778 
779     if (pattern[0] || pattern[1]) {
780       PetscCall(PetscSFCreate(xcomm, &sf));
781       PetscCall(PetscSFSetFromOptions(sf));
782       PetscCall(PetscSFSetGraphWithPattern(sf, map, pattern[0] ? PETSCSF_PATTERN_ALLGATHER : PETSCSF_PATTERN_GATHER));
783       goto functionend; /* No further analysis needed. What a big win! */
784     }
785   }
786 
787   /* Continue ...
788      Do block optimization by taking advantage of high level info available in ix, iy.
789      The block optimization is valid when all of the following conditions are met:
790      1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
791      2) ix, iy have the same block size;
792      3) all processors agree on one block size;
793      4) no blocks span more than one process;
794    */
795   bigcomm = (xcommsize == 1) ? ycomm : xcomm;
796 
797   /* Processors could go through different path in this if-else test */
798   m[0] = PETSC_INT_MAX;
799   m[1] = PETSC_INT_MIN;
800   if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
801     m[0] = PetscMin(bsx, bsy);
802     m[1] = PetscMax(bsx, bsy);
803   } else if (ixid == IS_BLOCK && iyid == IS_STRIDE && iystep == 1 && iyfirst % bsx == 0) {
804     m[0] = bsx;
805     m[1] = bsx;
806   } else if (ixid == IS_STRIDE && iyid == IS_BLOCK && ixstep == 1 && ixfirst % bsy == 0) {
807     m[0] = bsy;
808     m[1] = bsy;
809   }
810   /* Get max and min of bsx,bsy over all processes in one allreduce */
811   PetscCall(PetscGlobalMinMaxInt(bigcomm, m, mg));
812 
813   /* Since we used allreduce above, all ranks will have the same min and max. min==max
814      implies all ranks have the same bs. Do further test to see if local vectors are dividable
815      by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
816    */
817   if (mg[0] == mg[1] && mg[0] > 1) {
818     PetscCall(VecGetLocalSize(x, &xlen));
819     PetscCall(VecGetLocalSize(y, &ylen));
820     m[0] = xlen % mg[0];
821     m[1] = ylen % mg[0];
822     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, m, 2, MPIU_INT, MPI_LOR, bigcomm));
823     if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
824   }
825 
826   /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
827      and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
828      indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.
829 
830      xx is a little special. If x is seq, then xx is the concatenation of seq x's on ycomm. In this way,
831      we can treat PtoP and StoP uniformly as PtoS.
832    */
833   if (can_do_block_opt) {
834     const PetscInt *indices;
835 
836     /* Shrink x and ix */
837     bs = mg[0];
838     PetscCall(VecCreateMPIWithArray(bigcomm, 1, xlen / bs, PETSC_DECIDE, NULL, &xx)); /* We only care xx's layout */
839     if (ixid == IS_BLOCK) {
840       PetscCall(ISBlockGetIndices(ix, &indices));
841       PetscCall(ISBlockGetLocalSize(ix, &ixsize));
842       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ixsize, indices, PETSC_COPY_VALUES, &ixx));
843       PetscCall(ISBlockRestoreIndices(ix, &indices));
844     } else { /* ixid == IS_STRIDE */
845       PetscCall(ISGetLocalSize(ix, &ixsize));
846       PetscCall(ISCreateStride(PETSC_COMM_SELF, ixsize / bs, ixfirst / bs, 1, &ixx));
847     }
848 
849     /* Shrink y and iy */
850     PetscCall(VecCreateMPIWithArray(ycomm, 1, ylen / bs, PETSC_DECIDE, NULL, &yy));
851     if (iyid == IS_BLOCK) {
852       PetscCall(ISBlockGetIndices(iy, &indices));
853       PetscCall(ISBlockGetLocalSize(iy, &iysize));
854       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, iysize, indices, PETSC_COPY_VALUES, &iyy));
855       PetscCall(ISBlockRestoreIndices(iy, &indices));
856     } else { /* iyid == IS_STRIDE */
857       PetscCall(ISGetLocalSize(iy, &iysize));
858       PetscCall(ISCreateStride(PETSC_COMM_SELF, iysize / bs, iyfirst / bs, 1, &iyy));
859     }
860   } else {
861     ixx = ix;
862     iyy = iy;
863     yy  = y;
864     if (xcommsize == 1) PetscCall(VecCreateMPIWithArray(bigcomm, 1, xlen, PETSC_DECIDE, NULL, &xx));
865     else xx = x;
866   }
867 
868   /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
869   PetscCall(ISGetIndices(ixx, &xindices));
870   PetscCall(ISGetIndices(iyy, &yindices));
871   PetscCall(VecGetLayout(xx, &xlayout));
872 
873   if (ycommsize > 1) {
874     /* PtoP or StoP */
875 
876     /* Below is a piece of complex code with a very simple goal: move global index pairs (xindices[i], yindices[i]),
877        to owner process of yindices[i] according to ylayout, i = 0..n.
878 
879        I did it through a temp sf, but later I thought the old design was inefficient and also distorted log view.
880        We want to map one VecScatterCreate() call to one PetscSFCreate() call. The old design mapped to three
881        PetscSFCreate() calls. This code is on critical path of VecScatterSetUp and is used by every VecScatterCreate.
882        So I commented it out and did another optimized implementation. The commented code is left here for reference.
883      */
884 #if 0
885     const PetscInt *degree;
886     PetscSF        tmpsf;
887     PetscInt       inedges=0,*leafdata,*rootdata;
888 
889     PetscCall(VecGetOwnershipRange(xx,&xstart,NULL));
890     PetscCall(VecGetLayout(yy,&ylayout));
891     PetscCall(VecGetOwnershipRange(yy,&ystart,NULL));
892 
893     PetscCall(VecGetLocalSize(yy,&nroots));
894     PetscCall(ISGetLocalSize(iyy,&nleaves));
895     PetscCall(PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata));
896 
897     for (i=0; i<nleaves; i++) {
898       PetscCall(PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index));
899       leafdata[2*i]   = yindices[i];
900       leafdata[2*i+1] = (xcommsize > 1)? xindices[i] : xindices[i] + xstart;
901     }
902 
903     PetscCall(PetscSFCreate(ycomm,&tmpsf));
904     PetscCall(PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER));
905 
906     PetscCall(PetscSFComputeDegreeBegin(tmpsf,&degree));
907     PetscCall(PetscSFComputeDegreeEnd(tmpsf,&degree));
908 
909     for (i=0; i<nroots; i++) inedges += degree[i];
910     PetscCall(PetscMalloc1(inedges*2,&rootdata));
911     PetscCall(PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata));
912     PetscCall(PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata));
913 
914     PetscCall(PetscFree2(iremote,leafdata));
915     PetscCall(PetscSFDestroy(&tmpsf));
916 
917     /* rootdata contains global index pairs (i, j). j's are owned by the current process, but i's can point to anywhere.
918        We convert j to local, and convert i to (rank, index). In the end, we get an PtoS suitable for building SF.
919      */
920     nleaves = inedges;
921     PetscCall(VecGetLocalSize(xx,&nroots));
922     PetscCall(PetscMalloc1(nleaves,&ilocal));
923     PetscCall(PetscMalloc1(nleaves,&iremote));
924 
925     for (i=0; i<inedges; i++) {
926       ilocal[i] = rootdata[2*i] - ystart; /* convert y's global index to local index */
927       PetscCall(PetscLayoutFindOwnerIndex(xlayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index)); /* convert x's global index to (rank, index) */
928     }
929     PetscCall(PetscFree(rootdata));
930 #else
931     PetscInt        j, k, n, disp, rlentotal, *sstart, *xindices_sorted, *yindices_sorted;
932     const PetscInt *yrange;
933     PetscMPIInt     nsend, nrecv, nreq, yrank, *sendto, *recvfrom, tag1, tag2;
934     PetscInt       *slens, *rlens, count;
935     PetscInt       *rxindices, *ryindices;
936     MPI_Request    *reqs, *sreqs, *rreqs;
937 
938     /* Sorting makes code simpler, faster and also helps getting rid of many O(P) arrays, which hurt scalability at large scale
939        yindices_sorted - sorted yindices
940        xindices_sorted - xindices sorted along with yindces
941      */
942     PetscCall(ISGetLocalSize(ixx, &n)); /*ixx, iyy have the same local size */
943     PetscCall(PetscMalloc2(n, &xindices_sorted, n, &yindices_sorted));
944     PetscCall(PetscArraycpy(xindices_sorted, xindices, n));
945     PetscCall(PetscArraycpy(yindices_sorted, yindices, n));
946     PetscCall(PetscSortIntWithArray(n, yindices_sorted, xindices_sorted));
947     PetscCall(VecGetOwnershipRange(xx, &xstart, NULL));
948     if (xcommsize == 1) {
949       for (i = 0; i < n; i++) xindices_sorted[i] += xstart;
950     } /* Convert to global indices */
951 
952     /*
953              Calculate info about messages I need to send
954        nsend    - number of non-empty messages to send
955        sendto   - [nsend] ranks I will send messages to
956        sstart   - [nsend+1] sstart[i] is the start index in xsindices_sorted[] I send to rank sendto[i]
957        slens    - [ycommsize] I want to send slens[i] entries to rank i.
958      */
959     PetscCall(VecGetLayout(yy, &ylayout));
960     PetscCall(PetscLayoutGetRanges(ylayout, &yrange));
961     PetscCall(PetscCalloc1(ycommsize, &slens)); /* The only O(P) array in this algorithm */
962 
963     i = j = nsend = 0;
964     while (i < n) {
965       if (yindices_sorted[i] >= yrange[j + 1]) { /* If i-th index is out of rank j's bound */
966         do {
967           j++;
968         } while (yindices_sorted[i] >= yrange[j + 1] && j < ycommsize); /* Increase j until i-th index falls in rank j's bound */
969         PetscCheck(j != ycommsize, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " not owned by any process, upper bound %" PetscInt_FMT, yindices_sorted[i], yrange[ycommsize]);
970       }
971       i++;
972       if (!slens[j]++) nsend++;
973     }
974 
975     PetscCall(PetscMalloc2(nsend + 1, &sstart, nsend, &sendto));
976 
977     sstart[0] = 0;
978     for (i = j = 0; i < ycommsize; i++) {
979       if (slens[i]) {
980         PetscCall(PetscMPIIntCast(i, &sendto[j]));
981         sstart[j + 1] = sstart[j] + slens[i];
982         j++;
983       }
984     }
985 
986     /*
987       Calculate the reverse info about messages I will recv
988        nrecv     - number of messages I will recv
989        recvfrom  - [nrecv] ranks I recv from
990        rlens     - [nrecv] I will recv rlens[i] entries from rank recvfrom[i]
991        rlentotal - sum of rlens[]
992        rxindices - [rlentotal] recv buffer for xindices_sorted
993        ryindices - [rlentotal] recv buffer for yindices_sorted
994      */
995     PetscCall(PetscGatherNumberOfMessages_Private(ycomm, NULL, slens, &nrecv));
996     PetscCall(PetscGatherMessageLengths_Private(ycomm, nsend, nrecv, slens, &recvfrom, &rlens));
997     PetscCall(PetscFree(slens)); /* Free the O(P) array ASAP */
998     rlentotal = 0;
999     for (i = 0; i < nrecv; i++) rlentotal += rlens[i];
1000 
1001     /*
1002       Communicate with processors in recvfrom[] to populate rxindices and ryindices
1003     */
1004     PetscCall(PetscCommGetNewTag(ycomm, &tag1));
1005     PetscCall(PetscCommGetNewTag(ycomm, &tag2));
1006     PetscCall(PetscMalloc2(rlentotal, &rxindices, rlentotal, &ryindices));
1007     PetscCall(PetscMPIIntCast((nsend + nrecv) * 2, &nreq));
1008     PetscCall(PetscMalloc1(nreq, &reqs));
1009     sreqs = reqs;
1010     rreqs = PetscSafePointerPlusOffset(reqs, nsend * 2);
1011 
1012     for (i = disp = 0; i < nrecv; i++) {
1013       count = rlens[i];
1014       PetscCallMPI(MPIU_Irecv(rxindices + disp, count, MPIU_INT, recvfrom[i], tag1, ycomm, rreqs + i));
1015       PetscCallMPI(MPIU_Irecv(ryindices + disp, count, MPIU_INT, recvfrom[i], tag2, ycomm, rreqs + nrecv + i));
1016       disp += rlens[i];
1017     }
1018 
1019     for (i = 0; i < nsend; i++) {
1020       count = sstart[i + 1] - sstart[i];
1021       PetscCallMPI(MPIU_Isend(xindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag1, ycomm, sreqs + i));
1022       PetscCallMPI(MPIU_Isend(yindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag2, ycomm, sreqs + nsend + i));
1023     }
1024     PetscCallMPI(MPI_Waitall(nreq, reqs, MPI_STATUSES_IGNORE));
1025 
1026     /* Transform VecScatter into SF */
1027     nleaves = rlentotal;
1028     PetscCall(PetscMalloc1(nleaves, &ilocal));
1029     PetscCall(PetscMalloc1(nleaves, &iremote));
1030     PetscCallMPI(MPI_Comm_rank(ycomm, &yrank));
1031     for (i = disp = 0; i < nrecv; i++) {
1032       for (j = 0; j < rlens[i]; j++) {
1033         k         = disp + j;                                                                  /* k-th index pair */
1034         ilocal[k] = ryindices[k] - yrange[yrank];                                              /* Convert y's global index to local index */
1035         PetscCall(PetscLayoutFindOwnerIndex(xlayout, rxindices[k], &rank, &iremote[k].index)); /* Convert x's global index to (rank, index) */
1036         iremote[k].rank = rank;
1037       }
1038       disp += rlens[i];
1039     }
1040 
1041     PetscCall(PetscFree2(sstart, sendto));
1042     PetscCall(PetscFree(rlens));
1043     PetscCall(PetscFree(recvfrom));
1044     PetscCall(PetscFree(reqs));
1045     PetscCall(PetscFree2(rxindices, ryindices));
1046     PetscCall(PetscFree2(xindices_sorted, yindices_sorted));
1047 #endif
1048   } else {
1049     /* PtoS or StoS */
1050     PetscCall(ISGetLocalSize(iyy, &nleaves));
1051     PetscCall(PetscMalloc1(nleaves, &ilocal));
1052     PetscCall(PetscMalloc1(nleaves, &iremote));
1053     PetscCall(PetscArraycpy(ilocal, yindices, nleaves));
1054     for (i = 0; i < nleaves; i++) {
1055       PetscCall(PetscLayoutFindOwnerIndex(xlayout, xindices[i], &rank, &iremote[i].index));
1056       iremote[i].rank = rank;
1057     }
1058   }
1059 
1060   /* MUST build SF on xx's comm, which is not necessarily identical to yy's comm.
1061      In SF's view, xx contains the roots (i.e., the remote) and iremote[].rank are ranks in xx's comm.
1062      yy contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
1063   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)xx), &sf));
1064   sf->allow_multi_leaves = PETSC_TRUE;
1065   PetscCall(PetscSFSetFromOptions(sf));
1066   PetscCall(VecGetLocalSize(xx, &nroots));
1067   PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); /* Give ilocal/iremote to PETSc and no need to free them here */
1068 
1069   /* Free memory no longer needed */
1070   PetscCall(ISRestoreIndices(ixx, &xindices));
1071   PetscCall(ISRestoreIndices(iyy, &yindices));
1072   if (can_do_block_opt) {
1073     PetscCall(VecDestroy(&xx));
1074     PetscCall(VecDestroy(&yy));
1075     PetscCall(ISDestroy(&ixx));
1076     PetscCall(ISDestroy(&iyy));
1077   } else if (xcommsize == 1) {
1078     PetscCall(VecDestroy(&xx));
1079   }
1080 
1081 functionend:
1082   sf->vscat.bs = bs;
1083   if (sf->vscat.bs > 1) {
1084     PetscMPIInt ibs;
1085 
1086     PetscCall(PetscMPIIntCast(sf->vscat.bs, &ibs));
1087     PetscCallMPI(MPI_Type_contiguous(ibs, MPIU_SCALAR, &sf->vscat.unit));
1088     PetscCallMPI(MPI_Type_commit(&sf->vscat.unit));
1089   } else {
1090     sf->vscat.unit = MPIU_SCALAR;
1091   }
1092   PetscCall(VecGetLocalSize(x, &sf->vscat.from_n));
1093   PetscCall(VecGetLocalSize(y, &sf->vscat.to_n));
1094   if (!ix_old) PetscCall(ISDestroy(&ix)); /* We created helper ix, iy. Free them */
1095   if (!iy_old) PetscCall(ISDestroy(&iy));
1096 
1097   /* Set default */
1098   PetscCall(VecScatterSetFromOptions(sf));
1099   PetscCall(PetscSFSetUp(sf));
1100 
1101   *newsf = sf;
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 /*@
1106   VecScatterCreateToAll - Creates a vector and a scatter context that copies all
1107   vector values to each processor
1108 
1109   Collective
1110 
1111   Input Parameter:
1112 . vin - an `MPIVEC`
1113 
1114   Output Parameters:
1115 + ctx  - scatter context
1116 - vout - output `SEQVEC` that is large enough to scatter into
1117 
1118   Level: intermediate
1119 
1120   Example Usage:
1121 .vb
1122   VecScatterCreateToAll(vin, &ctx, &vout);
1123 
1124   // scatter as many times as you need
1125   VecScatterBegin(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1126   VecScatterEnd(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1127 
1128   // destroy scatter context and local vector when no longer needed
1129   VecScatterDestroy(&ctx);
1130   VecDestroy(&vout);
1131 .ve
1132 
1133   Notes:
1134   `vout` may be `NULL` [`PETSC_NULL_VEC` from Fortran] if you do not
1135   need to have it created
1136 
1137   Do NOT create a vector and then pass it in as the final argument `vout`! `vout` is created by this routine
1138   automatically (unless you pass `NULL` in for that argument if you do not need it).
1139 
1140 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCreateToZero()`, `VecScatterBegin()`, `VecScatterEnd()`
1141 @*/
VecScatterCreateToAll(Vec vin,VecScatter * ctx,Vec * vout)1142 PetscErrorCode VecScatterCreateToAll(Vec vin, VecScatter *ctx, Vec *vout)
1143 {
1144   PetscInt  N;
1145   IS        is;
1146   Vec       tmp;
1147   Vec      *tmpv;
1148   PetscBool tmpvout = PETSC_FALSE;
1149   VecType   roottype;
1150 
1151   PetscFunctionBegin;
1152   PetscValidHeaderSpecific(vin, VEC_CLASSID, 1);
1153   PetscValidType(vin, 1);
1154   PetscAssertPointer(ctx, 2);
1155   if (vout) {
1156     PetscAssertPointer(vout, 3);
1157     tmpv = vout;
1158   } else {
1159     tmpvout = PETSC_TRUE;
1160     tmpv    = &tmp;
1161   }
1162 
1163   /* Create seq vec on each proc, with the same size of the original vec */
1164   PetscCall(VecGetSize(vin, &N));
1165   PetscCall(VecGetRootType_Private(vin, &roottype));
1166   PetscCall(VecCreate(PETSC_COMM_SELF, tmpv));
1167   PetscCall(VecSetSizes(*tmpv, N, PETSC_DECIDE));
1168   PetscCall(VecSetType(*tmpv, roottype));
1169   /* Create the VecScatter ctx with the communication info */
1170   PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is));
1171   PetscCall(VecScatterCreate(vin, is, *tmpv, is, ctx));
1172   PetscCall(ISDestroy(&is));
1173   if (tmpvout) PetscCall(VecDestroy(tmpv));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 /*@
1178   VecScatterCreateToZero - Creates an output vector and a scatter context used to
1179   copy all vector values into the output vector on the zeroth processor
1180 
1181   Collective
1182 
1183   Input Parameter:
1184 . vin - `Vec` of type `MPIVEC`
1185 
1186   Output Parameters:
1187 + ctx  - scatter context
1188 - vout - output `SEQVEC` that is large enough to scatter into on processor 0 and
1189           of length zero on all other processors
1190 
1191   Level: intermediate
1192 
1193   Example Usage:
1194 .vb
1195   VecScatterCreateToZero(vin, &ctx, &vout);
1196 
1197   // scatter as many times as you need
1198   VecScatterBegin(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1199   VecScatterEnd(ctx, vin, vout, INSERT_VALUES, SCATTER_FORWARD);
1200 
1201   // destroy scatter context and local vector when no longer needed
1202   VecScatterDestroy(&ctx);
1203   VecDestroy(&vout);
1204 .ve
1205 
1206   Notes:
1207   vout may be `NULL` [`PETSC_NULL_VEC` from Fortran] if you do not
1208   need to have it created
1209 
1210   Do NOT create a vector and then pass it in as the final argument `vout`! `vout` is created by this routine
1211   automatically (unless you pass `NULL` in for that argument if you do not need it).
1212 
1213 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCreateToAll()`, `VecScatterBegin()`, `VecScatterEnd()`
1214 @*/
VecScatterCreateToZero(Vec vin,VecScatter * ctx,Vec * vout)1215 PetscErrorCode VecScatterCreateToZero(Vec vin, VecScatter *ctx, Vec *vout)
1216 {
1217   PetscInt    N;
1218   PetscMPIInt rank;
1219   IS          is;
1220   Vec         tmp;
1221   Vec        *tmpv;
1222   PetscBool   tmpvout = PETSC_FALSE;
1223   VecType     roottype;
1224 
1225   PetscFunctionBegin;
1226   PetscValidHeaderSpecific(vin, VEC_CLASSID, 1);
1227   PetscValidType(vin, 1);
1228   PetscAssertPointer(ctx, 2);
1229   if (vout) {
1230     PetscAssertPointer(vout, 3);
1231     tmpv = vout;
1232   } else {
1233     tmpvout = PETSC_TRUE;
1234     tmpv    = &tmp;
1235   }
1236 
1237   /* Create vec on each proc, with the same size of the original vec all on process 0 */
1238   PetscCall(VecGetSize(vin, &N));
1239   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vin), &rank));
1240   if (rank) N = 0;
1241   PetscCall(VecGetRootType_Private(vin, &roottype));
1242   PetscCall(VecCreate(PETSC_COMM_SELF, tmpv));
1243   PetscCall(VecSetSizes(*tmpv, N, PETSC_DECIDE));
1244   PetscCall(VecSetType(*tmpv, roottype));
1245   /* Create the VecScatter ctx with the communication info */
1246   PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is));
1247   PetscCall(VecScatterCreate(vin, is, *tmpv, is, ctx));
1248   PetscCall(ISDestroy(&is));
1249   if (tmpvout) PetscCall(VecDestroy(tmpv));
1250   PetscFunctionReturn(PETSC_SUCCESS);
1251 }
1252 
1253 /*@
1254   VecScatterBegin - Begins a generalized scatter from one vector to
1255   another. Complete the scattering phase with `VecScatterEnd()`.
1256 
1257   Neighbor-wise Collective
1258 
1259   Input Parameters:
1260 + sf   - scatter context generated by `VecScatterCreate()`
1261 . x    - the vector from which we scatter
1262 . y    - the vector to which we scatter
1263 . addv - either `ADD_VALUES`, `MAX_VALUES`, `MIN_VALUES` or `INSERT_VALUES`, with `INSERT_VALUES` mode any location
1264          not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1265 - mode - the scattering mode, usually `SCATTER_FORWARD`.  The available modes are: `SCATTER_FORWARD` or `SCATTER_REVERSE`
1266 
1267   Level: intermediate
1268 
1269   Notes:
1270   The vectors `x` and `y` need not be the same vectors used in the call
1271   to `VecScatterCreate()`, but `x` must have the same parallel data layout
1272   as that passed in as the `x` to `VecScatterCreate()`, similarly for the `y`.
1273   Most likely they have been obtained from `VecDuplicate()`.
1274 
1275   You cannot change the values in the input vector between the calls to `VecScatterBegin()`
1276   and `VecScatterEnd()`.
1277 
1278   If you use `SCATTER_REVERSE` the two arguments `x` and `y` should be reversed, from
1279   the `SCATTER_FORWARD`.
1280 
1281 .vb
1282   y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1283 .ve
1284 
1285   This scatter is far more general than the conventional
1286   scatter, since it can be a gather or a scatter or a combination,
1287   depending on the indices ix and iy.  If x is a parallel vector and y
1288   is sequential, `VecScatterBegin()` can serve to gather values to a
1289   single processor.  Similarly, if `y` is parallel and `x` sequential, the
1290   routine can scatter from one processor to many processors.
1291 
1292 .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterEnd()`, `InsertMode`, `ScatterMode`
1293 @*/
VecScatterBegin(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)1294 PetscErrorCode VecScatterBegin(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1295 {
1296   PetscInt to_n, from_n;
1297 
1298   PetscFunctionBegin;
1299   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1300   PetscValidHeaderSpecific(x, VEC_CLASSID, 2);
1301   PetscValidHeaderSpecific(y, VEC_CLASSID, 3);
1302   if (PetscDefined(USE_DEBUG)) {
1303     /*
1304      Error checking to make sure these vectors match the vectors used
1305      to create the vector scatter context. -1 in the from_n and to_n indicate the
1306      vector lengths are unknown (for example with mapped scatters) and thus
1307      no error checking is performed.
1308      */
1309     if (sf->vscat.from_n >= 0 && sf->vscat.to_n >= 0) {
1310       PetscCall(VecGetLocalSize(x, &from_n));
1311       PetscCall(VecGetLocalSize(y, &to_n));
1312       if (mode & SCATTER_REVERSE) {
1313         PetscCheck(to_n == sf->vscat.from_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter reverse and vector to != sf from size)", to_n, sf->vscat.from_n);
1314         PetscCheck(from_n == sf->vscat.to_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter reverse and vector from != sf to size)", from_n, sf->vscat.to_n);
1315       } else {
1316         PetscCheck(to_n == sf->vscat.to_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter forward and vector to != sf to size)", to_n, sf->vscat.to_n);
1317         PetscCheck(from_n == sf->vscat.from_n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Vector wrong size %" PetscInt_FMT " for scatter %" PetscInt_FMT " (scatter forward and vector from != sf from size)", from_n, sf->vscat.from_n);
1318       }
1319     }
1320   }
1321 
1322   sf->vscat.logging = PETSC_TRUE;
1323   PetscCall(PetscLogEventBegin(VEC_ScatterBegin, sf, x, y, 0));
1324   PetscCall(VecScatterBegin_Internal(sf, x, y, addv, mode));
1325   if (sf->vscat.beginandendtogether) PetscCall(VecScatterEnd_Internal(sf, x, y, addv, mode));
1326   PetscCall(PetscLogEventEnd(VEC_ScatterBegin, sf, x, y, 0));
1327   sf->vscat.logging = PETSC_FALSE;
1328   PetscFunctionReturn(PETSC_SUCCESS);
1329 }
1330 
1331 /*@
1332   VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1333   after first calling `VecScatterBegin()`.
1334 
1335   Neighbor-wise Collective
1336 
1337   Input Parameters:
1338 + sf   - scatter context generated by `VecScatterCreate()`
1339 . x    - the vector from which we scatter
1340 . y    - the vector to which we scatter
1341 . addv - one of `ADD_VALUES`, `MAX_VALUES`, `MIN_VALUES` or `INSERT_VALUES`
1342 - mode - the scattering mode, usually `SCATTER_FORWARD`.  The available modes are: `SCATTER_FORWARD`, `SCATTER_REVERSE`
1343 
1344   Level: intermediate
1345 
1346   Notes:
1347   If you use `SCATTER_REVERSE` the arguments `x` and `y` should be reversed, from the `SCATTER_FORWARD`.
1348 
1349   y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1350 
1351 .seealso: [](sec_scatter), `VecScatter`, `VecScatterBegin()`, `VecScatterCreate()`
1352 @*/
VecScatterEnd(VecScatter sf,Vec x,Vec y,InsertMode addv,ScatterMode mode)1353 PetscErrorCode VecScatterEnd(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1354 {
1355   PetscFunctionBegin;
1356   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1357   PetscValidHeaderSpecific(x, VEC_CLASSID, 2);
1358   PetscValidHeaderSpecific(y, VEC_CLASSID, 3);
1359   if (!sf->vscat.beginandendtogether) {
1360     sf->vscat.logging = PETSC_TRUE;
1361     PetscCall(PetscLogEventBegin(VEC_ScatterEnd, sf, x, y, 0));
1362     PetscCall(VecScatterEnd_Internal(sf, x, y, addv, mode));
1363     PetscCall(PetscLogEventEnd(VEC_ScatterEnd, sf, x, y, 0));
1364     sf->vscat.logging = PETSC_FALSE;
1365   }
1366   PetscFunctionReturn(PETSC_SUCCESS);
1367 }
1368