1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/mpiutils.h> 3 4 /*@C 5 PetscGatherNumberOfMessages - Computes the number of messages an MPI rank expects to receive during a neighbor communication 6 7 Collective 8 9 Input Parameters: 10 + comm - Communicator 11 . iflags - an array of integers of length sizeof(comm). A '1' in `ilengths`[i] represent a 12 message from current node to ith node. Optionally `NULL` 13 - ilengths - Non zero ilengths[i] represent a message to i of length `ilengths`[i]. 14 Optionally `NULL`. 15 16 Output Parameter: 17 . nrecvs - number of messages received 18 19 Level: developer 20 21 Notes: 22 With this info, the correct message lengths can be determined using 23 `PetscGatherMessageLengths()` 24 25 Either `iflags` or `ilengths` should be provided. If `iflags` is not 26 provided (`NULL`) it can be computed from `ilengths`. If `iflags` is 27 provided, `ilengths` is not required. 28 29 .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()` 30 @*/ 31 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs) 32 { 33 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL; 34 35 PetscFunctionBegin; 36 PetscCallMPI(MPI_Comm_size(comm, &size)); 37 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 38 39 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 40 41 /* If iflags not provided, compute iflags from ilengths */ 42 if (!iflags) { 43 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 44 iflags_local = iflags_localm; 45 for (i = 0; i < size; i++) { 46 if (ilengths[i]) iflags_local[i] = 1; 47 else iflags_local[i] = 0; 48 } 49 } else iflags_local = (PetscMPIInt *)iflags; 50 51 /* Post an allreduce to determine the number of messages the current MPI rank will receive */ 52 PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 53 *nrecvs = recv_buf[rank]; 54 55 PetscCall(PetscFree2(recv_buf, iflags_localm)); 56 PetscFunctionReturn(PETSC_SUCCESS); 57 } 58 59 /*@C 60 PetscGatherMessageLengths - Computes information about messages that an MPI rank will receive, 61 including (from-id,length) pairs for each message. 62 63 Collective 64 65 Input Parameters: 66 + comm - Communicator 67 . nsends - number of messages that are to be sent. 68 . nrecvs - number of messages being received 69 - ilengths - an array of integers of length sizeof(comm) 70 a non zero `ilengths`[i] represent a message to i of length `ilengths`[i] 71 72 Output Parameters: 73 + onodes - list of ranks from which messages are expected 74 - olengths - corresponding message lengths 75 76 Level: developer 77 78 Notes: 79 With this info, the correct `MPI_Irecv()` can be posted with the correct 80 from-id, with a buffer with the right amount of memory required. 81 82 The calling function deallocates the memory in onodes and olengths 83 84 To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()` 85 86 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()` 87 @*/ 88 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths) 89 { 90 PetscMPIInt size, rank, tag, i, j; 91 MPI_Request *s_waits = NULL, *r_waits = NULL; 92 MPI_Status *w_status = NULL; 93 94 PetscFunctionBegin; 95 PetscCallMPI(MPI_Comm_size(comm, &size)); 96 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 97 PetscCall(PetscCommGetNewTag(comm, &tag)); 98 99 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 100 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 101 if (nrecvs + nsends) s_waits = r_waits + nrecvs; 102 103 /* Post the Irecv to get the message length-info */ 104 PetscCall(PetscMalloc1(nrecvs, olengths)); 105 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 106 107 /* Post the Isends with the message length-info */ 108 for (i = 0, j = 0; i < size; ++i) { 109 if (ilengths[i]) { 110 PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j)); 111 j++; 112 } 113 } 114 115 /* Post waits on sends and receives */ 116 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 117 118 /* Pack up the received data */ 119 PetscCall(PetscMalloc1(nrecvs, onodes)); 120 for (i = 0; i < nrecvs; ++i) { 121 (*onodes)[i] = w_status[i].MPI_SOURCE; 122 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 123 /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS. 124 It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI 125 does not put correct value in recv buffer. See also 126 https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html 127 https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html 128 */ 129 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; 130 #endif 131 } 132 PetscCall(PetscFree2(r_waits, w_status)); 133 PetscFunctionReturn(PETSC_SUCCESS); 134 } 135 136 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */ 137 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs) 138 { 139 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL; 140 141 PetscFunctionBegin; 142 PetscCallMPI(MPI_Comm_size(comm, &size)); 143 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 144 145 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 146 147 /* If iflags not provided, compute iflags from ilengths */ 148 if (!iflags) { 149 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 150 iflags_local = iflags_localm; 151 for (i = 0; i < size; i++) { 152 if (ilengths[i]) iflags_local[i] = 1; 153 else iflags_local[i] = 0; 154 } 155 } else iflags_local = (PetscMPIInt *)iflags; 156 157 /* Post an allreduce to determine the number of messages the current MPI rank will receive */ 158 PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 159 *nrecvs = recv_buf[rank]; 160 161 PetscCall(PetscFree2(recv_buf, iflags_localm)); 162 PetscFunctionReturn(PETSC_SUCCESS); 163 } 164 165 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */ 166 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths) 167 { 168 PetscMPIInt size, rank, tag, i, j; 169 MPI_Request *s_waits = NULL, *r_waits = NULL; 170 MPI_Status *w_status = NULL; 171 172 PetscFunctionBegin; 173 PetscCallMPI(MPI_Comm_size(comm, &size)); 174 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 175 PetscCall(PetscCommGetNewTag(comm, &tag)); 176 177 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 178 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 179 if (r_waits) s_waits = r_waits + nrecvs; 180 181 /* Post the Irecv to get the message length-info */ 182 PetscCall(PetscMalloc1(nrecvs, olengths)); 183 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 184 185 /* Post the Isends with the message length-info */ 186 for (i = 0, j = 0; i < size; ++i) { 187 if (ilengths[i]) { 188 PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j)); 189 j++; 190 } 191 } 192 193 /* Post waits on sends and receives */ 194 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 195 196 /* Pack up the received data */ 197 PetscCall(PetscMalloc1(nrecvs, onodes)); 198 for (i = 0; i < nrecvs; ++i) { 199 (*onodes)[i] = w_status[i].MPI_SOURCE; 200 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */ 201 } 202 PetscCall(PetscFree2(r_waits, w_status)); 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 /*@C 207 PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive, 208 including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()` 209 except it takes TWO ilenths and output TWO olengths. 210 211 Collective 212 213 Input Parameters: 214 + comm - Communicator 215 . nsends - number of messages that are to be sent. 216 . nrecvs - number of messages being received 217 . ilengths1 - first array of integers of length sizeof(comm) 218 - ilengths2 - second array of integers of length sizeof(comm) 219 220 Output Parameters: 221 + onodes - list of ranks from which messages are expected 222 . olengths1 - first corresponding message lengths 223 - olengths2 - second message lengths 224 225 Level: developer 226 227 Notes: 228 With this info, the correct `MPI_Irecv()` can be posted with the correct 229 from-id, with a buffer with the right amount of memory required. 230 231 The calling function should `PetscFree()` the memory in `onodes` and `olengths` 232 233 To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()` 234 235 .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()` 236 @*/ 237 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2) 238 { 239 PetscMPIInt size, tag, i, j, *buf_s = NULL, *buf_r = NULL, *buf_j = NULL; 240 MPI_Request *s_waits = NULL, *r_waits = NULL; 241 MPI_Status *w_status = NULL; 242 243 PetscFunctionBegin; 244 PetscCallMPI(MPI_Comm_size(comm, &size)); 245 PetscCall(PetscCommGetNewTag(comm, &tag)); 246 247 /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ 248 PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status)); 249 s_waits = r_waits + nrecvs; 250 251 /* Post the Irecv to get the message length-info */ 252 PetscCall(PetscMalloc1(nrecvs + 1, olengths1)); 253 PetscCall(PetscMalloc1(nrecvs + 1, olengths2)); 254 for (i = 0; i < nrecvs; i++) { 255 buf_j = buf_r + (2 * i); 256 PetscCallMPI(MPI_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 257 } 258 259 /* Post the Isends with the message length-info */ 260 for (i = 0, j = 0; i < size; ++i) { 261 if (ilengths1[i]) { 262 buf_j = buf_s + (2 * j); 263 buf_j[0] = *(ilengths1 + i); 264 buf_j[1] = *(ilengths2 + i); 265 PetscCallMPI(MPI_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j)); 266 j++; 267 } 268 } 269 PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends); 270 271 /* Post waits on sends and receives */ 272 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 273 274 /* Pack up the received data */ 275 PetscCall(PetscMalloc1(nrecvs + 1, onodes)); 276 for (i = 0; i < nrecvs; ++i) { 277 (*onodes)[i] = w_status[i].MPI_SOURCE; 278 buf_j = buf_r + (2 * i); 279 (*olengths1)[i] = buf_j[0]; 280 (*olengths2)[i] = buf_j[1]; 281 } 282 283 PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status)); 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 /* 288 Allocate a buffer sufficient to hold messages of size specified in olengths. 289 And post Irecvs on these buffers using node info from onodes 290 */ 291 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits) 292 { 293 PetscInt **rbuf_t, i, len = 0; 294 MPI_Request *r_waits_t; 295 296 PetscFunctionBegin; 297 /* compute memory required for recv buffers */ 298 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 299 300 /* allocate memory for recv buffers */ 301 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 302 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 303 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 304 305 /* Post the receives */ 306 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 307 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i)); 308 309 *rbuf = rbuf_t; 310 *r_waits = r_waits_t; 311 PetscFunctionReturn(PETSC_SUCCESS); 312 } 313 314 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits) 315 { 316 PetscMPIInt i; 317 PetscScalar **rbuf_t; 318 MPI_Request *r_waits_t; 319 PetscInt len = 0; 320 321 PetscFunctionBegin; 322 /* compute memory required for recv buffers */ 323 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 324 325 /* allocate memory for recv buffers */ 326 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 327 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 328 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 329 330 /* Post the receives */ 331 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 332 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i)); 333 334 *rbuf = rbuf_t; 335 *r_waits = r_waits_t; 336 PetscFunctionReturn(PETSC_SUCCESS); 337 } 338