1 2 #include <petscsys.h> /*I "petscsys.h" I*/ 3 #include <petsc/private/mpiutils.h> 4 5 /*@C 6 PetscGatherNumberOfMessages - Computes the number of messages an MPI rank expects to receive during a neighbor communication 7 8 Collective 9 10 Input Parameters: 11 + comm - Communicator 12 . iflags - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a 13 message from current node to ith node. Optionally NULL 14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i]. 15 Optionally NULL. 16 17 Output Parameters: 18 . nrecvs - number of messages received 19 20 Level: developer 21 22 Notes: 23 With this info, the correct message lengths can be determined using 24 `PetscGatherMessageLengths()` 25 26 Either iflags or ilengths should be provided. If iflags is not 27 provided (NULL) it can be computed from ilengths. If iflags is 28 provided, ilengths is not required. 29 30 .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()` 31 @*/ 32 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs) 33 { 34 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL; 35 36 PetscFunctionBegin; 37 PetscCallMPI(MPI_Comm_size(comm, &size)); 38 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 39 40 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 41 42 /* If iflags not provided, compute iflags from ilengths */ 43 if (!iflags) { 44 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 45 iflags_local = iflags_localm; 46 for (i = 0; i < size; i++) { 47 if (ilengths[i]) iflags_local[i] = 1; 48 else iflags_local[i] = 0; 49 } 50 } else iflags_local = (PetscMPIInt *)iflags; 51 52 /* Post an allreduce to determine the numer of messages the current node will receive */ 53 PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 54 *nrecvs = recv_buf[rank]; 55 56 PetscCall(PetscFree2(recv_buf, iflags_localm)); 57 PetscFunctionReturn(0); 58 } 59 60 /*@C 61 PetscGatherMessageLengths - Computes infomation about messages that an MPI rank will receive, 62 including (from-id,length) pairs for each message. 63 64 Collective 65 66 Input Parameters: 67 + comm - Communicator 68 . nsends - number of messages that are to be sent. 69 . nrecvs - number of messages being received 70 - ilengths - an array of integers of length sizeof(comm) 71 a non zero ilengths[i] represent a message to i of length ilengths[i] 72 73 Output Parameters: 74 + onodes - list of node-ids from which messages are expected 75 - olengths - corresponding message lengths 76 77 Level: developer 78 79 Notes: 80 With this info, the correct `MPI_Irecv()` can be posted with the correct 81 from-id, with a buffer with the right amount of memory required. 82 83 The calling function deallocates the memory in onodes and olengths 84 85 To determine nrecvs, one can use `PetscGatherNumberOfMessages()` 86 87 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()` 88 @*/ 89 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths) 90 { 91 PetscMPIInt size, rank, tag, i, j; 92 MPI_Request *s_waits = NULL, *r_waits = NULL; 93 MPI_Status *w_status = NULL; 94 95 PetscFunctionBegin; 96 PetscCallMPI(MPI_Comm_size(comm, &size)); 97 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 98 PetscCall(PetscCommGetNewTag(comm, &tag)); 99 100 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 101 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 102 s_waits = r_waits + nrecvs; 103 104 /* Post the Irecv to get the message length-info */ 105 PetscCall(PetscMalloc1(nrecvs, olengths)); 106 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 107 108 /* Post the Isends with the message length-info */ 109 for (i = 0, j = 0; i < size; ++i) { 110 if (ilengths[i]) { 111 PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j)); 112 j++; 113 } 114 } 115 116 /* Post waits on sends and receives */ 117 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 118 119 /* Pack up the received data */ 120 PetscCall(PetscMalloc1(nrecvs, onodes)); 121 for (i = 0; i < nrecvs; ++i) { 122 (*onodes)[i] = w_status[i].MPI_SOURCE; 123 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 124 /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS. 125 It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI 126 does not put correct value in recv buffer. See also 127 https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html 128 https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html 129 */ 130 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; 131 #endif 132 } 133 PetscCall(PetscFree2(r_waits, w_status)); 134 PetscFunctionReturn(0); 135 } 136 137 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */ 138 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs) 139 { 140 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL; 141 142 PetscFunctionBegin; 143 PetscCallMPI(MPI_Comm_size(comm, &size)); 144 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 145 146 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 147 148 /* If iflags not provided, compute iflags from ilengths */ 149 if (!iflags) { 150 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 151 iflags_local = iflags_localm; 152 for (i = 0; i < size; i++) { 153 if (ilengths[i]) iflags_local[i] = 1; 154 else iflags_local[i] = 0; 155 } 156 } else iflags_local = (PetscMPIInt *)iflags; 157 158 /* Post an allreduce to determine the numer of messages the current node will receive */ 159 PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 160 *nrecvs = recv_buf[rank]; 161 162 PetscCall(PetscFree2(recv_buf, iflags_localm)); 163 PetscFunctionReturn(0); 164 } 165 166 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */ 167 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths) 168 { 169 PetscMPIInt size, rank, tag, i, j; 170 MPI_Request *s_waits = NULL, *r_waits = NULL; 171 MPI_Status *w_status = NULL; 172 173 PetscFunctionBegin; 174 PetscCallMPI(MPI_Comm_size(comm, &size)); 175 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 176 PetscCall(PetscCommGetNewTag(comm, &tag)); 177 178 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 179 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 180 s_waits = r_waits + nrecvs; 181 182 /* Post the Irecv to get the message length-info */ 183 PetscCall(PetscMalloc1(nrecvs, olengths)); 184 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 185 186 /* Post the Isends with the message length-info */ 187 for (i = 0, j = 0; i < size; ++i) { 188 if (ilengths[i]) { 189 PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j)); 190 j++; 191 } 192 } 193 194 /* Post waits on sends and receives */ 195 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 196 197 /* Pack up the received data */ 198 PetscCall(PetscMalloc1(nrecvs, onodes)); 199 for (i = 0; i < nrecvs; ++i) { 200 (*onodes)[i] = w_status[i].MPI_SOURCE; 201 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */ 202 } 203 PetscCall(PetscFree2(r_waits, w_status)); 204 PetscFunctionReturn(0); 205 } 206 207 /*@C 208 PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive, 209 including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()` 210 except it takes TWO ilenths and output TWO olengths. 211 212 Collective 213 214 Input Parameters: 215 + comm - Communicator 216 . nsends - number of messages that are to be sent. 217 . nrecvs - number of messages being received 218 . ilengths1 - first array of integers of length sizeof(comm) 219 - ilengths2 - second array of integers of length sizeof(comm) 220 221 Output Parameters: 222 + onodes - list of node-ids from which messages are expected 223 . olengths1 - first corresponding message lengths 224 - olengths2 - second message lengths 225 226 Level: developer 227 228 Notes: 229 With this info, the correct `MPI_Irecv()` can be posted with the correct 230 from-id, with a buffer with the right amount of memory required. 231 232 The calling function deallocates the memory in onodes and olengths 233 234 To determine nrecvs, one can use PetscGatherNumberOfMessages() 235 236 .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()` 237 @*/ 238 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2) 239 { 240 PetscMPIInt size, tag, i, j, *buf_s = NULL, *buf_r = NULL, *buf_j = NULL; 241 MPI_Request *s_waits = NULL, *r_waits = NULL; 242 MPI_Status *w_status = NULL; 243 244 PetscFunctionBegin; 245 PetscCallMPI(MPI_Comm_size(comm, &size)); 246 PetscCall(PetscCommGetNewTag(comm, &tag)); 247 248 /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ 249 PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status)); 250 s_waits = r_waits + nrecvs; 251 252 /* Post the Irecv to get the message length-info */ 253 PetscCall(PetscMalloc1(nrecvs + 1, olengths1)); 254 PetscCall(PetscMalloc1(nrecvs + 1, olengths2)); 255 for (i = 0; i < nrecvs; i++) { 256 buf_j = buf_r + (2 * i); 257 PetscCallMPI(MPI_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 258 } 259 260 /* Post the Isends with the message length-info */ 261 for (i = 0, j = 0; i < size; ++i) { 262 if (ilengths1[i]) { 263 buf_j = buf_s + (2 * j); 264 buf_j[0] = *(ilengths1 + i); 265 buf_j[1] = *(ilengths2 + i); 266 PetscCallMPI(MPI_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j)); 267 j++; 268 } 269 } 270 PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends); 271 272 /* Post waits on sends and receives */ 273 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 274 275 /* Pack up the received data */ 276 PetscCall(PetscMalloc1(nrecvs + 1, onodes)); 277 for (i = 0; i < nrecvs; ++i) { 278 (*onodes)[i] = w_status[i].MPI_SOURCE; 279 buf_j = buf_r + (2 * i); 280 (*olengths1)[i] = buf_j[0]; 281 (*olengths2)[i] = buf_j[1]; 282 } 283 284 PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status)); 285 PetscFunctionReturn(0); 286 } 287 288 /* 289 Allocate a buffer sufficient to hold messages of size specified in olengths. 290 And post Irecvs on these buffers using node info from onodes 291 */ 292 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits) 293 { 294 PetscInt **rbuf_t, i, len = 0; 295 MPI_Request *r_waits_t; 296 297 PetscFunctionBegin; 298 /* compute memory required for recv buffers */ 299 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 300 301 /* allocate memory for recv buffers */ 302 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 303 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 304 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 305 306 /* Post the receives */ 307 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 308 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i)); 309 310 *rbuf = rbuf_t; 311 *r_waits = r_waits_t; 312 PetscFunctionReturn(0); 313 } 314 315 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits) 316 { 317 PetscMPIInt i; 318 PetscScalar **rbuf_t; 319 MPI_Request *r_waits_t; 320 PetscInt len = 0; 321 322 PetscFunctionBegin; 323 /* compute memory required for recv buffers */ 324 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 325 326 /* allocate memory for recv buffers */ 327 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 328 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 329 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 330 331 /* Post the receives */ 332 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 333 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i)); 334 335 *rbuf = rbuf_t; 336 *r_waits = r_waits_t; 337 PetscFunctionReturn(0); 338 } 339