1 2 #include <petscsys.h> /*I "petscsys.h" I*/ 3 #include <petsc/private/mpiutils.h> 4 5 /*@C 6 PetscGatherNumberOfMessages - Computes the number of messages an MPI rank expects to receive during a neighbor communication 7 8 Collective 9 10 Input Parameters: 11 + comm - Communicator 12 . iflags - an array of integers of length sizeof(comm). A '1' in `ilengths`[i] represent a 13 message from current node to ith node. Optionally `NULL` 14 - ilengths - Non zero ilengths[i] represent a message to i of length `ilengths`[i]. 15 Optionally `NULL`. 16 17 Output Parameters: 18 . nrecvs - number of messages received 19 20 Level: developer 21 22 Notes: 23 With this info, the correct message lengths can be determined using 24 `PetscGatherMessageLengths()` 25 26 Either `iflags` or `ilengths` should be provided. If `iflags` is not 27 provided (`NULL`) it can be computed from `ilengths`. If `iflags` is 28 provided, `ilengths` is not required. 29 30 .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()` 31 @*/ 32 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs) 33 { 34 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL; 35 36 PetscFunctionBegin; 37 PetscCallMPI(MPI_Comm_size(comm, &size)); 38 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 39 40 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 41 42 /* If iflags not provided, compute iflags from ilengths */ 43 if (!iflags) { 44 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 45 iflags_local = iflags_localm; 46 for (i = 0; i < size; i++) { 47 if (ilengths[i]) iflags_local[i] = 1; 48 else iflags_local[i] = 0; 49 } 50 } else iflags_local = (PetscMPIInt *)iflags; 51 52 /* Post an allreduce to determine the numer of messages the current node will receive */ 53 PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 54 *nrecvs = recv_buf[rank]; 55 56 PetscCall(PetscFree2(recv_buf, iflags_localm)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /*@C 61 PetscGatherMessageLengths - Computes information about messages that an MPI rank will receive, 62 including (from-id,length) pairs for each message. 63 64 Collective 65 66 Input Parameters: 67 + comm - Communicator 68 . nsends - number of messages that are to be sent. 69 . nrecvs - number of messages being received 70 - ilengths - an array of integers of length sizeof(comm) 71 a non zero `ilengths`[i] represent a message to i of length `ilengths`[i] 72 73 Output Parameters: 74 + onodes - list of ranks from which messages are expected 75 - olengths - corresponding message lengths 76 77 Level: developer 78 79 Notes: 80 With this info, the correct `MPI_Irecv()` can be posted with the correct 81 from-id, with a buffer with the right amount of memory required. 82 83 The calling function deallocates the memory in onodes and olengths 84 85 To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()` 86 87 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()` 88 @*/ 89 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths) 90 { 91 PetscMPIInt size, rank, tag, i, j; 92 MPI_Request *s_waits = NULL, *r_waits = NULL; 93 MPI_Status *w_status = NULL; 94 95 PetscFunctionBegin; 96 PetscCallMPI(MPI_Comm_size(comm, &size)); 97 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 98 PetscCall(PetscCommGetNewTag(comm, &tag)); 99 100 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 101 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 102 s_waits = r_waits + nrecvs; 103 104 /* Post the Irecv to get the message length-info */ 105 PetscCall(PetscMalloc1(nrecvs, olengths)); 106 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 107 108 /* Post the Isends with the message length-info */ 109 for (i = 0, j = 0; i < size; ++i) { 110 if (ilengths[i]) { 111 PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j)); 112 j++; 113 } 114 } 115 116 /* Post waits on sends and receives */ 117 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 118 119 /* Pack up the received data */ 120 PetscCall(PetscMalloc1(nrecvs, onodes)); 121 for (i = 0; i < nrecvs; ++i) { 122 (*onodes)[i] = w_status[i].MPI_SOURCE; 123 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 124 /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS. 125 It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI 126 does not put correct value in recv buffer. See also 127 https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html 128 https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html 129 */ 130 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; 131 #endif 132 } 133 PetscCall(PetscFree2(r_waits, w_status)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */ 138 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs) 139 { 140 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL; 141 142 PetscFunctionBegin; 143 PetscCallMPI(MPI_Comm_size(comm, &size)); 144 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 145 146 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 147 148 /* If iflags not provided, compute iflags from ilengths */ 149 if (!iflags) { 150 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 151 iflags_local = iflags_localm; 152 for (i = 0; i < size; i++) { 153 if (ilengths[i]) iflags_local[i] = 1; 154 else iflags_local[i] = 0; 155 } 156 } else iflags_local = (PetscMPIInt *)iflags; 157 158 /* Post an allreduce to determine the numer of messages the current node will receive */ 159 PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 160 *nrecvs = recv_buf[rank]; 161 162 PetscCall(PetscFree2(recv_buf, iflags_localm)); 163 PetscFunctionReturn(PETSC_SUCCESS); 164 } 165 166 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */ 167 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths) 168 { 169 PetscMPIInt size, rank, tag, i, j; 170 MPI_Request *s_waits = NULL, *r_waits = NULL; 171 MPI_Status *w_status = NULL; 172 173 PetscFunctionBegin; 174 PetscCallMPI(MPI_Comm_size(comm, &size)); 175 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 176 PetscCall(PetscCommGetNewTag(comm, &tag)); 177 178 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 179 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 180 s_waits = r_waits + nrecvs; 181 182 /* Post the Irecv to get the message length-info */ 183 PetscCall(PetscMalloc1(nrecvs, olengths)); 184 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 185 186 /* Post the Isends with the message length-info */ 187 for (i = 0, j = 0; i < size; ++i) { 188 if (ilengths[i]) { 189 PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j)); 190 j++; 191 } 192 } 193 194 /* Post waits on sends and receives */ 195 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 196 197 /* Pack up the received data */ 198 PetscCall(PetscMalloc1(nrecvs, onodes)); 199 for (i = 0; i < nrecvs; ++i) { 200 (*onodes)[i] = w_status[i].MPI_SOURCE; 201 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */ 202 } 203 PetscCall(PetscFree2(r_waits, w_status)); 204 PetscFunctionReturn(PETSC_SUCCESS); 205 } 206 207 /*@C 208 PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive, 209 including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()` 210 except it takes TWO ilenths and output TWO olengths. 211 212 Collective 213 214 Input Parameters: 215 + comm - Communicator 216 . nsends - number of messages that are to be sent. 217 . nrecvs - number of messages being received 218 . ilengths1 - first array of integers of length sizeof(comm) 219 - ilengths2 - second array of integers of length sizeof(comm) 220 221 Output Parameters: 222 + onodes - list of ranks from which messages are expected 223 . olengths1 - first corresponding message lengths 224 - olengths2 - second message lengths 225 226 Level: developer 227 228 Notes: 229 With this info, the correct `MPI_Irecv()` can be posted with the correct 230 from-id, with a buffer with the right amount of memory required. 231 232 The calling function should `PetscFree()` the memory in `onodes` and `olengths` 233 234 To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()` 235 236 .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()` 237 @*/ 238 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2) 239 { 240 PetscMPIInt size, tag, i, j, *buf_s = NULL, *buf_r = NULL, *buf_j = NULL; 241 MPI_Request *s_waits = NULL, *r_waits = NULL; 242 MPI_Status *w_status = NULL; 243 244 PetscFunctionBegin; 245 PetscCallMPI(MPI_Comm_size(comm, &size)); 246 PetscCall(PetscCommGetNewTag(comm, &tag)); 247 248 /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ 249 PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status)); 250 s_waits = r_waits + nrecvs; 251 252 /* Post the Irecv to get the message length-info */ 253 PetscCall(PetscMalloc1(nrecvs + 1, olengths1)); 254 PetscCall(PetscMalloc1(nrecvs + 1, olengths2)); 255 for (i = 0; i < nrecvs; i++) { 256 buf_j = buf_r + (2 * i); 257 PetscCallMPI(MPI_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 258 } 259 260 /* Post the Isends with the message length-info */ 261 for (i = 0, j = 0; i < size; ++i) { 262 if (ilengths1[i]) { 263 buf_j = buf_s + (2 * j); 264 buf_j[0] = *(ilengths1 + i); 265 buf_j[1] = *(ilengths2 + i); 266 PetscCallMPI(MPI_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j)); 267 j++; 268 } 269 } 270 PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends); 271 272 /* Post waits on sends and receives */ 273 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 274 275 /* Pack up the received data */ 276 PetscCall(PetscMalloc1(nrecvs + 1, onodes)); 277 for (i = 0; i < nrecvs; ++i) { 278 (*onodes)[i] = w_status[i].MPI_SOURCE; 279 buf_j = buf_r + (2 * i); 280 (*olengths1)[i] = buf_j[0]; 281 (*olengths2)[i] = buf_j[1]; 282 } 283 284 PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status)); 285 PetscFunctionReturn(PETSC_SUCCESS); 286 } 287 288 /* 289 Allocate a buffer sufficient to hold messages of size specified in olengths. 290 And post Irecvs on these buffers using node info from onodes 291 */ 292 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits) 293 { 294 PetscInt **rbuf_t, i, len = 0; 295 MPI_Request *r_waits_t; 296 297 PetscFunctionBegin; 298 /* compute memory required for recv buffers */ 299 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 300 301 /* allocate memory for recv buffers */ 302 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 303 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 304 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 305 306 /* Post the receives */ 307 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 308 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i)); 309 310 *rbuf = rbuf_t; 311 *r_waits = r_waits_t; 312 PetscFunctionReturn(PETSC_SUCCESS); 313 } 314 315 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits) 316 { 317 PetscMPIInt i; 318 PetscScalar **rbuf_t; 319 MPI_Request *r_waits_t; 320 PetscInt len = 0; 321 322 PetscFunctionBegin; 323 /* compute memory required for recv buffers */ 324 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 325 326 /* allocate memory for recv buffers */ 327 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 328 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 329 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 330 331 /* Post the receives */ 332 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 333 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i)); 334 335 *rbuf = rbuf_t; 336 *r_waits = r_waits_t; 337 PetscFunctionReturn(PETSC_SUCCESS); 338 } 339