1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm, const PetscMPIInt[], const PetscInt[], PetscMPIInt *); 4 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm, PetscMPIInt, PetscMPIInt, const PetscInt[], PetscMPIInt **, PetscInt **); 5 6 /*@C 7 PetscGatherNumberOfMessages - Computes the number of messages an MPI rank expects to receive during a neighbor communication 8 9 Collective, No Fortran Support 10 11 Input Parameters: 12 + comm - Communicator 13 . iflags - an array of integers of length sizeof(comm). A '1' in `ilengths`[i] represent a 14 message from current node to ith node. Optionally `NULL` 15 - ilengths - Non zero ilengths[i] represent a message to i of length `ilengths`[i]. 16 Optionally `NULL`. 17 18 Output Parameter: 19 . nrecvs - number of messages received 20 21 Level: developer 22 23 Notes: 24 With this info, the correct message lengths can be determined using 25 `PetscGatherMessageLengths()` 26 27 Either `iflags` or `ilengths` should be provided. If `iflags` is not 28 provided (`NULL`) it can be computed from `ilengths`. If `iflags` is 29 provided, `ilengths` is not required. 30 31 .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()` 32 @*/ 33 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs) 34 { 35 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm; 36 37 PetscFunctionBegin; 38 PetscCallMPI(MPI_Comm_size(comm, &size)); 39 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 40 41 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 42 43 /* If iflags not provided, compute iflags from ilengths */ 44 if (!iflags) { 45 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 46 iflags_local = iflags_localm; 47 for (i = 0; i < size; i++) { 48 if (ilengths[i]) iflags_local[i] = 1; 49 else iflags_local[i] = 0; 50 } 51 } else iflags_local = (PetscMPIInt *)iflags; 52 53 /* Post an allreduce to determine the number of messages the current MPI rank will receive */ 54 PetscCallMPI(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 55 *nrecvs = recv_buf[rank]; 56 57 PetscCall(PetscFree2(recv_buf, iflags_localm)); 58 PetscFunctionReturn(PETSC_SUCCESS); 59 } 60 61 /*@C 62 PetscGatherMessageLengths - Computes information about messages that an MPI rank will receive, 63 including (from-id,length) pairs for each message. 64 65 Collective, No Fortran Support 66 67 Input Parameters: 68 + comm - Communicator 69 . nsends - number of messages that are to be sent. 70 . nrecvs - number of messages being received 71 - ilengths - an array of integers of length sizeof(comm) 72 a non zero `ilengths`[i] represent a message to i of length `ilengths`[i] 73 74 Output Parameters: 75 + onodes - list of ranks from which messages are expected 76 - olengths - corresponding message lengths 77 78 Level: developer 79 80 Notes: 81 With this info, the correct `MPIU_Irecv()` can be posted with the correct 82 from-id, with a buffer with the right amount of memory required. 83 84 The calling function deallocates the memory in onodes and olengths 85 86 To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()` 87 88 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()` 89 @*/ 90 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths) 91 { 92 PetscMPIInt size, rank, tag, i, j; 93 MPI_Request *s_waits, *r_waits; 94 MPI_Status *w_status; 95 96 PetscFunctionBegin; 97 PetscCallMPI(MPI_Comm_size(comm, &size)); 98 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 99 PetscCall(PetscCommGetNewTag(comm, &tag)); 100 101 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 102 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 103 s_waits = PetscSafePointerPlusOffset(r_waits, nrecvs); 104 105 /* Post the Irecv to get the message length-info */ 106 PetscCall(PetscMalloc1(nrecvs, olengths)); 107 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPIU_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 108 109 /* Post the Isends with the message length-info */ 110 for (i = 0, j = 0; i < size; ++i) { 111 if (ilengths[i]) { 112 PetscCallMPI(MPIU_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j)); 113 j++; 114 } 115 } 116 117 /* Post waits on sends and receives */ 118 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 119 120 /* Pack up the received data */ 121 PetscCall(PetscMalloc1(nrecvs, onodes)); 122 for (i = 0; i < nrecvs; ++i) { 123 (*onodes)[i] = w_status[i].MPI_SOURCE; 124 #if defined(PETSC_HAVE_OPENMPI) 125 /* This line is a workaround for a bug in Open MPI 2.1.1 distributed by Ubuntu-18.04.2 LTS. 126 It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. Open MPI 127 does not put correct value in recv buffer. See also 128 https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html 129 https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html 130 */ 131 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; 132 #endif 133 } 134 PetscCall(PetscFree2(r_waits, w_status)); 135 PetscFunctionReturn(PETSC_SUCCESS); 136 } 137 138 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */ 139 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs) 140 { 141 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm; 142 143 PetscFunctionBegin; 144 PetscCallMPI(MPI_Comm_size(comm, &size)); 145 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 146 147 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 148 149 /* If iflags not provided, compute iflags from ilengths */ 150 if (!iflags) { 151 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 152 iflags_local = iflags_localm; 153 for (i = 0; i < size; i++) { 154 if (ilengths[i]) iflags_local[i] = 1; 155 else iflags_local[i] = 0; 156 } 157 } else iflags_local = (PetscMPIInt *)iflags; 158 159 /* Post an allreduce to determine the number of messages the current MPI rank will receive */ 160 PetscCallMPI(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 161 *nrecvs = recv_buf[rank]; 162 163 PetscCall(PetscFree2(recv_buf, iflags_localm)); 164 PetscFunctionReturn(PETSC_SUCCESS); 165 } 166 167 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */ 168 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths) 169 { 170 PetscMPIInt size, rank, tag, i, j; 171 MPI_Request *s_waits, *r_waits; 172 MPI_Status *w_status; 173 174 PetscFunctionBegin; 175 PetscCallMPI(MPI_Comm_size(comm, &size)); 176 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 177 PetscCall(PetscCommGetNewTag(comm, &tag)); 178 179 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 180 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 181 s_waits = PetscSafePointerPlusOffset(r_waits, nrecvs); 182 183 /* Post the Irecv to get the message length-info */ 184 PetscCall(PetscMalloc1(nrecvs, olengths)); 185 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPIU_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 186 187 /* Post the Isends with the message length-info */ 188 for (i = 0, j = 0; i < size; ++i) { 189 if (ilengths[i]) { 190 PetscCallMPI(MPIU_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j)); 191 j++; 192 } 193 } 194 195 /* Post waits on sends and receives */ 196 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 197 198 /* Pack up the received data */ 199 PetscCall(PetscMalloc1(nrecvs, onodes)); 200 for (i = 0; i < nrecvs; ++i) { 201 (*onodes)[i] = w_status[i].MPI_SOURCE; 202 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */ 203 } 204 PetscCall(PetscFree2(r_waits, w_status)); 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 /*@C 209 PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive, 210 including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()` 211 except it takes TWO ilenths and output TWO olengths. 212 213 Collective, No Fortran Support 214 215 Input Parameters: 216 + comm - Communicator 217 . nsends - number of messages that are to be sent. 218 . nrecvs - number of messages being received 219 . ilengths1 - first array of integers of length sizeof(comm) 220 - ilengths2 - second array of integers of length sizeof(comm) 221 222 Output Parameters: 223 + onodes - list of ranks from which messages are expected 224 . olengths1 - first corresponding message lengths 225 - olengths2 - second message lengths 226 227 Level: developer 228 229 Notes: 230 With this info, the correct `MPIU_Irecv()` can be posted with the correct 231 from-id, with a buffer with the right amount of memory required. 232 233 The calling function should `PetscFree()` the memory in `onodes` and `olengths` 234 235 To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()` 236 237 .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()` 238 @*/ 239 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2) 240 { 241 PetscMPIInt size, tag, i, j, *buf_s, *buf_r, *buf_j = NULL; 242 MPI_Request *s_waits, *r_waits; 243 MPI_Status *w_status; 244 245 PetscFunctionBegin; 246 PetscCallMPI(MPI_Comm_size(comm, &size)); 247 PetscCall(PetscCommGetNewTag(comm, &tag)); 248 249 /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ 250 PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status)); 251 s_waits = PetscSafePointerPlusOffset(r_waits, nrecvs); 252 253 /* Post the Irecv to get the message length-info */ 254 PetscCall(PetscMalloc1(nrecvs + 1, olengths1)); 255 PetscCall(PetscMalloc1(nrecvs + 1, olengths2)); 256 for (i = 0; i < nrecvs; i++) { 257 buf_j = buf_r + (2 * i); 258 PetscCallMPI(MPIU_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 259 } 260 261 /* Post the Isends with the message length-info */ 262 for (i = 0, j = 0; i < size; ++i) { 263 if (ilengths1[i]) { 264 buf_j = buf_s + (2 * j); 265 buf_j[0] = *(ilengths1 + i); 266 buf_j[1] = *(ilengths2 + i); 267 PetscCallMPI(MPIU_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j)); 268 j++; 269 } 270 } 271 PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends); 272 273 /* Post waits on sends and receives */ 274 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 275 276 /* Pack up the received data */ 277 PetscCall(PetscMalloc1(nrecvs + 1, onodes)); 278 for (i = 0; i < nrecvs; ++i) { 279 (*onodes)[i] = w_status[i].MPI_SOURCE; 280 buf_j = buf_r + (2 * i); 281 (*olengths1)[i] = buf_j[0]; 282 (*olengths2)[i] = buf_j[1]; 283 } 284 285 PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 /* 290 Allocate a buffer sufficient to hold messages of size specified in olengths. 291 And post Irecvs on these buffers using node info from onodes 292 */ 293 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits) 294 { 295 PetscInt **rbuf_t, i, len = 0; 296 MPI_Request *r_waits_t; 297 298 PetscFunctionBegin; 299 /* compute memory required for recv buffers */ 300 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 301 302 /* allocate memory for recv buffers */ 303 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 304 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 305 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 306 307 /* Post the receives */ 308 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 309 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPIU_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i)); 310 311 *rbuf = rbuf_t; 312 *r_waits = r_waits_t; 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits) 317 { 318 PetscMPIInt i; 319 PetscScalar **rbuf_t; 320 MPI_Request *r_waits_t; 321 PetscInt len = 0; 322 323 PetscFunctionBegin; 324 /* compute memory required for recv buffers */ 325 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 326 327 /* allocate memory for recv buffers */ 328 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 329 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 330 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 331 332 /* Post the receives */ 333 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 334 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPIU_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i)); 335 336 *rbuf = rbuf_t; 337 *r_waits = r_waits_t; 338 PetscFunctionReturn(PETSC_SUCCESS); 339 } 340