1 /*
2 Split phase global vector reductions with support for combining the
3 communication portion of several operations. Using MPI-1.1 support only
4
5 The idea for this and much of the initial code is contributed by
6 Victor Eijkhout.
7
8 Usage:
9 VecDotBegin(Vec,Vec,PetscScalar *);
10 VecNormBegin(Vec,NormType,PetscReal *);
11 ....
12 VecDotEnd(Vec,Vec,PetscScalar *);
13 VecNormEnd(Vec,NormType,PetscReal *);
14
15 Limitations:
16 - The order of the xxxEnd() functions MUST be in the same order
17 as the xxxBegin(). There is extensive error checking to try to
18 insure that the user calls the routines in the correct order
19 */
20
21 #include <petsc/private/vecimpl.h> /*I "petscvec.h" I*/
22
MPIU_Iallreduce(void * sendbuf,void * recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request * request)23 static PetscMPIInt MPIU_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
24 {
25 PetscMPIInt err;
26
27 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
28 err = MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request);
29 #else
30 err = MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm);
31 *request = MPI_REQUEST_NULL;
32 #endif
33 return err;
34 }
35
36 static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);
37
38 /*
39 PetscSplitReductionCreate - Creates a data structure to contain the queued information.
40 */
PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction ** sr)41 static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
42 {
43 PetscFunctionBegin;
44 PetscCall(PetscNew(sr));
45 (*sr)->numopsbegin = 0;
46 (*sr)->numopsend = 0;
47 (*sr)->state = STATE_BEGIN;
48 #define MAXOPS 32
49 (*sr)->maxops = MAXOPS;
50 PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
51 #undef MAXOPS
52 (*sr)->comm = comm;
53 (*sr)->request = MPI_REQUEST_NULL;
54 (*sr)->mix = PETSC_FALSE;
55 (*sr)->async = PetscDefined(HAVE_MPI_NONBLOCKING_COLLECTIVES) ? PETSC_TRUE : PETSC_FALSE; /* Enable by default */
56 /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
57 PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
58 PetscFunctionReturn(PETSC_SUCCESS);
59 }
60
61 /*
62 This function is the MPI reduction operation used when there is
63 a combination of sums and max in the reduction. The call below to
64 MPI_Op_create() converts the function PetscSplitReduction_Local() to the
65 MPI operator PetscSplitReduction_Op.
66 */
67 MPI_Op PetscSplitReduction_Op = 0;
68
PetscSplitReduction_Local(void * in,void * out,PetscMPIInt * cnt,MPI_Datatype * datatype)69 PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
70 {
71 struct PetscScalarInt {
72 PetscScalar v;
73 PetscInt i;
74 };
75 struct PetscScalarInt *xin = (struct PetscScalarInt *)in;
76 struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
77 PetscInt i, count = *cnt;
78
79 PetscFunctionBegin;
80 if (*datatype != MPIU_SCALAR_INT) {
81 PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
82 PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
83 }
84 for (i = 0; i < count; i++) {
85 if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
86 else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
87 else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
88 else {
89 PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
90 PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
91 }
92 }
93 PetscFunctionReturnVoid();
94 }
95
96 /*@
97 PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
98
99 Collective but not synchronizing
100
101 Input Parameter:
102 . comm - communicator on which split reduction has been queued
103
104 Level: advanced
105
106 Note:
107 Calling this function is optional when using split-mode reduction. On supporting hardware,
108 calling this after all VecXxxBegin() allows the reduction to make asynchronous progress
109 before the result is needed (in VecXxxEnd()).
110
111 .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
112 @*/
PetscCommSplitReductionBegin(MPI_Comm comm)113 PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
114 {
115 PetscSplitReduction *sr;
116
117 PetscFunctionBegin;
118 if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
119 PetscCall(PetscSplitReductionGet(comm, &sr));
120 PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
121 if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
122 PetscMPIInt numops = sr->numopsbegin;
123 PetscSRReductionType *reducetype = sr->reducetype;
124 PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
125 PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
126 MPI_Comm comm = sr->comm;
127 PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
128
129 PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
130 PetscCallMPI(MPI_Comm_size(sr->comm, &size));
131 if (size == 1) {
132 PetscCall(PetscArraycpy(gvalues, lvalues, numops));
133 } else {
134 /* determine if all reductions are sum, max, or min */
135 for (PetscMPIInt i = 0; i < numops; i++) {
136 if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
137 else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
138 else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
139 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
140 }
141 PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
142 if (sum_flg + max_flg + min_flg > 1) {
143 sr->mix = PETSC_TRUE;
144 for (PetscMPIInt i = 0; i < numops; i++) {
145 sr->lvalues_mix[i].v = lvalues[i];
146 sr->lvalues_mix[i].i = reducetype[i];
147 }
148 PetscCallMPI(MPIU_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
149 } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
150 PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
151 } else if (min_flg) {
152 PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
153 } else {
154 PetscCallMPI(MPIU_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
155 }
156 }
157 sr->state = STATE_PENDING;
158 sr->numopsend = 0;
159 PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
160 } else {
161 PetscCall(PetscSplitReductionApply(sr));
162 }
163 PetscFunctionReturn(PETSC_SUCCESS);
164 }
165
PetscSplitReductionEnd(PetscSplitReduction * sr)166 PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
167 {
168 PetscFunctionBegin;
169 switch (sr->state) {
170 case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
171 PetscCall(PetscSplitReductionApply(sr));
172 break;
173 case STATE_PENDING:
174 /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
175 PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
176 if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
177 sr->state = STATE_END;
178 if (sr->mix) {
179 for (PetscMPIInt i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
180 sr->mix = PETSC_FALSE;
181 }
182 PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
183 break;
184 default:
185 break; /* everything is already done */
186 }
187 PetscFunctionReturn(PETSC_SUCCESS);
188 }
189
190 /*
191 PetscSplitReductionApply - Actually do the communication required for a split phase reduction
192 */
PetscSplitReductionApply(PetscSplitReduction * sr)193 static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
194 {
195 PetscMPIInt numops = sr->numopsbegin;
196 PetscSRReductionType *reducetype = sr->reducetype;
197 PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
198 PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
199 MPI_Comm comm = sr->comm;
200 PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
201
202 PetscFunctionBegin;
203 PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
204 PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
205 PetscCallMPI(MPI_Comm_size(sr->comm, &size));
206 if (size == 1) {
207 PetscCall(PetscArraycpy(gvalues, lvalues, numops));
208 } else {
209 /* determine if all reductions are sum, max, or min */
210 for (PetscMPIInt i = 0; i < numops; i++) {
211 if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
212 else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
213 else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
214 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
215 }
216 if (sum_flg + max_flg + min_flg > 1) {
217 PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218 for (PetscMPIInt i = 0; i < numops; i++) {
219 sr->lvalues_mix[i].v = lvalues[i];
220 sr->lvalues_mix[i].i = reducetype[i];
221 }
222 PetscCallMPI(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
223 for (PetscMPIInt i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
224 } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
225 PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
226 } else if (min_flg) {
227 PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
228 } else {
229 PetscCallMPI(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
230 }
231 }
232 sr->state = STATE_END;
233 sr->numopsend = 0;
234 PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
235 PetscFunctionReturn(PETSC_SUCCESS);
236 }
237
238 /*
239 PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240 */
PetscSplitReductionExtend(PetscSplitReduction * sr)241 PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
242 {
243 struct PetscScalarInt {
244 PetscScalar v;
245 PetscInt i;
246 };
247 PetscMPIInt maxops = sr->maxops;
248 PetscSRReductionType *reducetype = sr->reducetype;
249 PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
250 struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
251 struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
252 void **invecs = sr->invecs;
253
254 PetscFunctionBegin;
255 sr->maxops = 2 * maxops;
256 PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
257 PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
258 PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
259 PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
260 PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
261 PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
262 PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
263 PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
264 PetscFunctionReturn(PETSC_SUCCESS);
265 }
266
PetscSplitReductionDestroy(PetscSplitReduction * sr)267 static PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
268 {
269 PetscFunctionBegin;
270 PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
271 PetscCall(PetscFree(sr));
272 PetscFunctionReturn(PETSC_SUCCESS);
273 }
274
275 PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
276
277 /*
278 Private routine to delete internal storage when a communicator is freed.
279 This is called by MPI, not by users.
280
281 The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
282 it was MPI_Comm *comm.
283 */
Petsc_DelReduction(MPI_Comm comm,PETSC_UNUSED PetscMPIInt keyval,void * attr_val,PETSC_UNUSED void * extra_state)284 static PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PETSC_UNUSED PetscMPIInt keyval, void *attr_val, PETSC_UNUSED void *extra_state)
285 {
286 PetscFunctionBegin;
287 PetscCallReturnMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
288 PetscCallReturnMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
289 PetscFunctionReturn(PETSC_SUCCESS);
290 }
291
292 /*
293 PetscSplitReductionGet - Gets the split reduction object from a
294 PETSc vector, creates if it does not exit.
295
296 */
PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction ** sr)297 PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
298 {
299 PetscMPIInt iflg;
300
301 PetscFunctionBegin;
302 PetscCheck(!PetscDefined(HAVE_THREADSAFETY), comm, PETSC_ERR_SUP, "PetscSplitReductionGet() is not thread-safe");
303 if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
304 /*
305 The calling sequence of the 2nd argument to this function changed
306 between MPI Standard 1.0 and the revisions 1.1 Here we match the
307 new standard, if you are using an MPI implementation that uses
308 the older version you will get a warning message about the next line;
309 it is only a warning message and should do no harm.
310 */
311 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
312 }
313 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &iflg));
314 if (!iflg) { /* doesn't exist yet so create it and put it in */
315 PetscCall(PetscSplitReductionCreate(comm, sr));
316 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
317 PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
318 }
319 PetscFunctionReturn(PETSC_SUCCESS);
320 }
321
322 /*@
323 VecDotBegin - Starts a split phase dot product computation.
324
325 Input Parameters:
326 + x - the first vector
327 . y - the second vector
328 - result - where the result will go (can be `NULL`)
329
330 Level: advanced
331
332 Notes:
333 Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
334
335 .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
336 `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
337 @*/
VecDotBegin(Vec x,Vec y,PetscScalar * result)338 PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
339 {
340 PetscSplitReduction *sr;
341 MPI_Comm comm;
342
343 PetscFunctionBegin;
344 PetscValidHeaderSpecific(x, VEC_CLASSID, 1);
345 PetscValidHeaderSpecific(y, VEC_CLASSID, 2);
346 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
347 if (PetscDefined(HAVE_THREADSAFETY)) {
348 PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
349 PetscCall(VecDot(x, y, result));
350 PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 PetscCall(PetscSplitReductionGet(comm, &sr));
353 PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
354 if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
355 sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
356 sr->invecs[sr->numopsbegin] = (void *)x;
357 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
358 PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
359 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
360 PetscFunctionReturn(PETSC_SUCCESS);
361 }
362
363 /*@
364 VecDotEnd - Ends a split phase dot product computation.
365
366 Input Parameters:
367 + x - the first vector (can be `NULL`)
368 . y - the second vector (can be `NULL`)
369 - result - where the result will go
370
371 Level: advanced
372
373 Notes:
374 Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
375
376 .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
377 `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
378 @*/
VecDotEnd(Vec x,Vec y,PetscScalar * result)379 PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
380 {
381 PetscSplitReduction *sr;
382 MPI_Comm comm;
383
384 PetscFunctionBegin;
385 if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
386 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
387 PetscCall(PetscSplitReductionGet(comm, &sr));
388 PetscCall(PetscSplitReductionEnd(sr));
389
390 PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
391 PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
392 PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
393 *result = sr->gvalues[sr->numopsend++];
394
395 /*
396 We are finished getting all the results so reset to no outstanding requests
397 */
398 if (sr->numopsend == sr->numopsbegin) {
399 sr->state = STATE_BEGIN;
400 sr->numopsend = 0;
401 sr->numopsbegin = 0;
402 sr->mix = PETSC_FALSE;
403 }
404 PetscFunctionReturn(PETSC_SUCCESS);
405 }
406
407 /*@
408 VecTDotBegin - Starts a split phase transpose dot product computation.
409
410 Input Parameters:
411 + x - the first vector
412 . y - the second vector
413 - result - where the result will go (can be `NULL`)
414
415 Level: advanced
416
417 Notes:
418 Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
419
420 .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
421 `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
422 @*/
VecTDotBegin(Vec x,Vec y,PetscScalar * result)423 PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
424 {
425 PetscSplitReduction *sr;
426 MPI_Comm comm;
427
428 PetscFunctionBegin;
429 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
430 if (PetscDefined(HAVE_THREADSAFETY)) {
431 PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
432 PetscCall(VecTDot(x, y, result));
433 PetscFunctionReturn(PETSC_SUCCESS);
434 }
435 PetscCall(PetscSplitReductionGet(comm, &sr));
436 PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
437 if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
438 sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
439 sr->invecs[sr->numopsbegin] = (void *)x;
440 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
441 PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
442 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
443 PetscFunctionReturn(PETSC_SUCCESS);
444 }
445
446 /*@
447 VecTDotEnd - Ends a split phase transpose dot product computation.
448
449 Input Parameters:
450 + x - the first vector (can be `NULL`)
451 . y - the second vector (can be `NULL`)
452 - result - where the result will go
453
454 Level: advanced
455
456 Notes:
457 Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
458
459 .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
460 `VecDotBegin()`, `VecDotEnd()`
461 @*/
VecTDotEnd(Vec x,Vec y,PetscScalar * result)462 PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
463 {
464 PetscFunctionBegin;
465 /*
466 TDotEnd() is the same as DotEnd() so reuse the code
467 */
468 PetscCall(VecDotEnd(x, y, result));
469 PetscFunctionReturn(PETSC_SUCCESS);
470 }
471
472 /*@
473 VecNormBegin - Starts a split phase norm computation.
474
475 Input Parameters:
476 + x - the first vector
477 . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
478 - result - where the result will go (can be `NULL`)
479
480 Level: advanced
481
482 Notes:
483 Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
484
485 .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
486 @*/
VecNormBegin(Vec x,NormType ntype,PetscReal * result)487 PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
488 {
489 PetscSplitReduction *sr;
490 PetscReal lresult[2];
491 MPI_Comm comm;
492
493 PetscFunctionBegin;
494 PetscValidHeaderSpecific(x, VEC_CLASSID, 1);
495 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
496 if (PetscDefined(HAVE_THREADSAFETY)) {
497 PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
498 PetscCall(PetscObjectStateIncrease((PetscObject)x)); // increase PetscObjectState to invalidate cached norms
499 PetscCall(VecNorm(x, ntype, result));
500 PetscFunctionReturn(PETSC_SUCCESS);
501 }
502 PetscCall(PetscSplitReductionGet(comm, &sr));
503 PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
504 if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));
505
506 sr->invecs[sr->numopsbegin] = (void *)x;
507 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
508 PetscUseTypeMethod(x, norm_local, ntype, lresult);
509 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
510 if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
511 if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
512 if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
513 else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
514 sr->lvalues[sr->numopsbegin++] = lresult[0];
515 if (ntype == NORM_1_AND_2) {
516 sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
517 sr->lvalues[sr->numopsbegin++] = lresult[1];
518 }
519 PetscFunctionReturn(PETSC_SUCCESS);
520 }
521
522 /*@
523 VecNormEnd - Ends a split phase norm computation.
524
525 Input Parameters:
526 + x - the first vector
527 . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
528 - result - where the result will go
529
530 Level: advanced
531
532 Notes:
533 Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
534
535 The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value
536
537 .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
538 @*/
VecNormEnd(Vec x,NormType ntype,PetscReal * result)539 PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
540 {
541 PetscSplitReduction *sr;
542 MPI_Comm comm;
543
544 PetscFunctionBegin;
545 if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
546 PetscValidHeaderSpecific(x, VEC_CLASSID, 1);
547 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
548 PetscCall(PetscSplitReductionGet(comm, &sr));
549 PetscCall(PetscSplitReductionEnd(sr));
550
551 PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
552 PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
553 PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
554 result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
555
556 if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
557 else if (ntype == NORM_1_AND_2) {
558 result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
559 result[1] = PetscSqrtReal(result[1]);
560 }
561 if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));
562
563 if (sr->numopsend == sr->numopsbegin) {
564 sr->state = STATE_BEGIN;
565 sr->numopsend = 0;
566 sr->numopsbegin = 0;
567 }
568 PetscFunctionReturn(PETSC_SUCCESS);
569 }
570
571 /*
572 Possibly add
573
574 PetscReductionSumBegin/End()
575 PetscReductionMaxBegin/End()
576 PetscReductionMinBegin/End()
577 or have more like MPI with a single function with flag for Op? Like first better
578 */
579
580 /*@
581 VecMDotBegin - Starts a split phase multiple dot product computation.
582
583 Input Parameters:
584 + x - the first vector
585 . nv - number of vectors
586 . y - array of vectors
587 - result - where the result will go (can be `NULL`)
588
589 Level: advanced
590
591 Notes:
592 Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
593
594 .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
595 `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
596 @*/
VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])597 PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
598 {
599 PetscSplitReduction *sr;
600 MPI_Comm comm;
601 PetscInt i;
602
603 PetscFunctionBegin;
604 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
605 if (PetscDefined(HAVE_THREADSAFETY)) {
606 PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
607 PetscCall(VecMDot(x, nv, y, result));
608 PetscFunctionReturn(PETSC_SUCCESS);
609 }
610 PetscCall(PetscSplitReductionGet(comm, &sr));
611 PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
612 for (i = 0; i < nv; i++) {
613 if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
614 sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
615 sr->invecs[sr->numopsbegin + i] = (void *)x;
616 }
617 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
618 PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
619 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
620 sr->numopsbegin += nv;
621 PetscFunctionReturn(PETSC_SUCCESS);
622 }
623
624 /*@
625 VecMDotEnd - Ends a split phase multiple dot product computation.
626
627 Input Parameters:
628 + x - the first vector (can be `NULL`)
629 . nv - number of vectors
630 - y - array of vectors (can be `NULL`)
631
632 Output Parameter:
633 . result - where the result will go
634
635 Level: advanced
636
637 Notes:
638 Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
639
640 .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
641 `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
642 @*/
VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])643 PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
644 {
645 PetscSplitReduction *sr;
646 MPI_Comm comm;
647 PetscInt i;
648
649 PetscFunctionBegin;
650 if (PetscDefined(HAVE_THREADSAFETY)) PetscFunctionReturn(PETSC_SUCCESS);
651 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
652 PetscCall(PetscSplitReductionGet(comm, &sr));
653 PetscCall(PetscSplitReductionEnd(sr));
654
655 PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
656 PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
657 PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
658 for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];
659
660 /*
661 We are finished getting all the results so reset to no outstanding requests
662 */
663 if (sr->numopsend == sr->numopsbegin) {
664 sr->state = STATE_BEGIN;
665 sr->numopsend = 0;
666 sr->numopsbegin = 0;
667 }
668 PetscFunctionReturn(PETSC_SUCCESS);
669 }
670
671 /*@
672 VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
673
674 Input Parameters:
675 + x - the first vector
676 . nv - number of vectors
677 . y - array of vectors
678 - result - where the result will go (can be `NULL`)
679
680 Level: advanced
681
682 Notes:
683 Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.
684
685 .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
686 `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
687 @*/
VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])688 PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
689 {
690 PetscSplitReduction *sr;
691 MPI_Comm comm;
692 PetscInt i;
693
694 PetscFunctionBegin;
695 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
696 if (PetscDefined(HAVE_THREADSAFETY)) {
697 PetscCheck(result, comm, PETSC_ERR_ARG_NULL, "result cannot be NULL when configuring --with-threadsafety");
698 PetscCall(VecMTDot(x, nv, y, result));
699 PetscFunctionReturn(PETSC_SUCCESS);
700 }
701 PetscCall(PetscSplitReductionGet(comm, &sr));
702 PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
703 for (i = 0; i < nv; i++) {
704 if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
705 sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
706 sr->invecs[sr->numopsbegin + i] = (void *)x;
707 }
708 PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
709 PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
710 PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
711 sr->numopsbegin += nv;
712 PetscFunctionReturn(PETSC_SUCCESS);
713 }
714
715 /*@
716 VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
717
718 Input Parameters:
719 + x - the first vector (can be `NULL`)
720 . nv - number of vectors
721 - y - array of vectors (can be `NULL`)
722
723 Output Parameter:
724 . result - where the result will go
725
726 Level: advanced
727
728 Notes:
729 Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
730
731 .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
732 `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
733 @*/
VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])734 PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
735 {
736 PetscFunctionBegin;
737 /*
738 MTDotEnd() is the same as MDotEnd() so reuse the code
739 */
740 PetscCall(VecMDotEnd(x, nv, y, result));
741 PetscFunctionReturn(PETSC_SUCCESS);
742 }
743