1 2 #include <petscsys.h> /*I "petscsys.h" I*/ 3 #include <petsc/private/mpiutils.h> 4 5 /*@C 6 PetscGatherNumberOfMessages - Computes the number of messages a node expects to receive 7 8 Collective 9 10 Input Parameters: 11 + comm - Communicator 12 . iflags - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a 13 message from current node to ith node. Optionally NULL 14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i]. 15 Optionally NULL. 16 17 Output Parameters: 18 . nrecvs - number of messages received 19 20 Level: developer 21 22 Notes: 23 With this info, the correct message lengths can be determined using 24 PetscGatherMessageLengths() 25 26 Either iflags or ilengths should be provided. If iflags is not 27 provided (NULL) it can be computed from ilengths. If iflags is 28 provided, ilengths is not required. 29 30 .seealso: `PetscGatherMessageLengths()` 31 @*/ 32 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs) { 33 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL; 34 35 PetscFunctionBegin; 36 PetscCallMPI(MPI_Comm_size(comm, &size)); 37 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 38 39 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 40 41 /* If iflags not provided, compute iflags from ilengths */ 42 if (!iflags) { 43 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 44 iflags_local = iflags_localm; 45 for (i = 0; i < size; i++) { 46 if (ilengths[i]) iflags_local[i] = 1; 47 else iflags_local[i] = 0; 48 } 49 } else iflags_local = (PetscMPIInt *)iflags; 50 51 /* Post an allreduce to determine the numer of messages the current node will receive */ 52 PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 53 *nrecvs = recv_buf[rank]; 54 55 PetscCall(PetscFree2(recv_buf, iflags_localm)); 56 PetscFunctionReturn(0); 57 } 58 59 /*@C 60 PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive, 61 including (from-id,length) pairs for each message. 62 63 Collective 64 65 Input Parameters: 66 + comm - Communicator 67 . nsends - number of messages that are to be sent. 68 . nrecvs - number of messages being received 69 - ilengths - an array of integers of length sizeof(comm) 70 a non zero ilengths[i] represent a message to i of length ilengths[i] 71 72 Output Parameters: 73 + onodes - list of node-ids from which messages are expected 74 - olengths - corresponding message lengths 75 76 Level: developer 77 78 Notes: 79 With this info, the correct MPI_Irecv() can be posted with the correct 80 from-id, with a buffer with the right amount of memory required. 81 82 The calling function deallocates the memory in onodes and olengths 83 84 To determine nrecvs, one can use PetscGatherNumberOfMessages() 85 86 .seealso: `PetscGatherNumberOfMessages()` 87 @*/ 88 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths) { 89 PetscMPIInt size, rank, tag, i, j; 90 MPI_Request *s_waits = NULL, *r_waits = NULL; 91 MPI_Status *w_status = NULL; 92 93 PetscFunctionBegin; 94 PetscCallMPI(MPI_Comm_size(comm, &size)); 95 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 96 PetscCall(PetscCommGetNewTag(comm, &tag)); 97 98 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 99 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 100 s_waits = r_waits + nrecvs; 101 102 /* Post the Irecv to get the message length-info */ 103 PetscCall(PetscMalloc1(nrecvs, olengths)); 104 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 105 106 /* Post the Isends with the message length-info */ 107 for (i = 0, j = 0; i < size; ++i) { 108 if (ilengths[i]) { 109 PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j)); 110 j++; 111 } 112 } 113 114 /* Post waits on sends and receives */ 115 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 116 117 /* Pack up the received data */ 118 PetscCall(PetscMalloc1(nrecvs, onodes)); 119 for (i = 0; i < nrecvs; ++i) { 120 (*onodes)[i] = w_status[i].MPI_SOURCE; 121 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 122 /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS. 123 It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI 124 does not put correct value in recv buffer. See also 125 https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html 126 https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html 127 */ 128 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; 129 #endif 130 } 131 PetscCall(PetscFree2(r_waits, w_status)); 132 PetscFunctionReturn(0); 133 } 134 135 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */ 136 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs) { 137 PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL; 138 139 PetscFunctionBegin; 140 PetscCallMPI(MPI_Comm_size(comm, &size)); 141 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 142 143 PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm)); 144 145 /* If iflags not provided, compute iflags from ilengths */ 146 if (!iflags) { 147 PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided"); 148 iflags_local = iflags_localm; 149 for (i = 0; i < size; i++) { 150 if (ilengths[i]) iflags_local[i] = 1; 151 else iflags_local[i] = 0; 152 } 153 } else iflags_local = (PetscMPIInt *)iflags; 154 155 /* Post an allreduce to determine the numer of messages the current node will receive */ 156 PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm)); 157 *nrecvs = recv_buf[rank]; 158 159 PetscCall(PetscFree2(recv_buf, iflags_localm)); 160 PetscFunctionReturn(0); 161 } 162 163 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */ 164 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths) { 165 PetscMPIInt size, rank, tag, i, j; 166 MPI_Request *s_waits = NULL, *r_waits = NULL; 167 MPI_Status *w_status = NULL; 168 169 PetscFunctionBegin; 170 PetscCallMPI(MPI_Comm_size(comm, &size)); 171 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 172 PetscCall(PetscCommGetNewTag(comm, &tag)); 173 174 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 175 PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status)); 176 s_waits = r_waits + nrecvs; 177 178 /* Post the Irecv to get the message length-info */ 179 PetscCall(PetscMalloc1(nrecvs, olengths)); 180 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 181 182 /* Post the Isends with the message length-info */ 183 for (i = 0, j = 0; i < size; ++i) { 184 if (ilengths[i]) { 185 PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j)); 186 j++; 187 } 188 } 189 190 /* Post waits on sends and receives */ 191 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 192 193 /* Pack up the received data */ 194 PetscCall(PetscMalloc1(nrecvs, onodes)); 195 for (i = 0; i < nrecvs; ++i) { 196 (*onodes)[i] = w_status[i].MPI_SOURCE; 197 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */ 198 } 199 PetscCall(PetscFree2(r_waits, w_status)); 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive, 205 including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths() 206 except it takes TWO ilenths and output TWO olengths. 207 208 Collective 209 210 Input Parameters: 211 + comm - Communicator 212 . nsends - number of messages that are to be sent. 213 . nrecvs - number of messages being received 214 . ilengths1 - first array of integers of length sizeof(comm) 215 - ilengths2 - second array of integers of length sizeof(comm) 216 217 Output Parameters: 218 + onodes - list of node-ids from which messages are expected 219 . olengths1 - first corresponding message lengths 220 - olengths2 - second message lengths 221 222 Level: developer 223 224 Notes: 225 With this info, the correct MPI_Irecv() can be posted with the correct 226 from-id, with a buffer with the right amount of memory required. 227 228 The calling function deallocates the memory in onodes and olengths 229 230 To determine nrecvs, one can use PetscGatherNumberOfMessages() 231 232 .seealso: `PetscGatherMessageLengths()` `and` `PetscGatherNumberOfMessages()` 233 @*/ 234 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2) { 235 PetscMPIInt size, tag, i, j, *buf_s = NULL, *buf_r = NULL, *buf_j = NULL; 236 MPI_Request *s_waits = NULL, *r_waits = NULL; 237 MPI_Status *w_status = NULL; 238 239 PetscFunctionBegin; 240 PetscCallMPI(MPI_Comm_size(comm, &size)); 241 PetscCall(PetscCommGetNewTag(comm, &tag)); 242 243 /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ 244 PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status)); 245 s_waits = r_waits + nrecvs; 246 247 /* Post the Irecv to get the message length-info */ 248 PetscCall(PetscMalloc1(nrecvs + 1, olengths1)); 249 PetscCall(PetscMalloc1(nrecvs + 1, olengths2)); 250 for (i = 0; i < nrecvs; i++) { 251 buf_j = buf_r + (2 * i); 252 PetscCallMPI(MPI_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i)); 253 } 254 255 /* Post the Isends with the message length-info */ 256 for (i = 0, j = 0; i < size; ++i) { 257 if (ilengths1[i]) { 258 buf_j = buf_s + (2 * j); 259 buf_j[0] = *(ilengths1 + i); 260 buf_j[1] = *(ilengths2 + i); 261 PetscCallMPI(MPI_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j)); 262 j++; 263 } 264 } 265 PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends); 266 267 /* Post waits on sends and receives */ 268 if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status)); 269 270 /* Pack up the received data */ 271 PetscCall(PetscMalloc1(nrecvs + 1, onodes)); 272 for (i = 0; i < nrecvs; ++i) { 273 (*onodes)[i] = w_status[i].MPI_SOURCE; 274 buf_j = buf_r + (2 * i); 275 (*olengths1)[i] = buf_j[0]; 276 (*olengths2)[i] = buf_j[1]; 277 } 278 279 PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status)); 280 PetscFunctionReturn(0); 281 } 282 283 /* 284 285 Allocate a buffer sufficient to hold messages of size specified in olengths. 286 And post Irecvs on these buffers using node info from onodes 287 288 */ 289 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits) { 290 PetscInt **rbuf_t, i, len = 0; 291 MPI_Request *r_waits_t; 292 293 PetscFunctionBegin; 294 /* compute memory required for recv buffers */ 295 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 296 297 /* allocate memory for recv buffers */ 298 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 299 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 300 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 301 302 /* Post the receives */ 303 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 304 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i)); 305 306 *rbuf = rbuf_t; 307 *r_waits = r_waits_t; 308 PetscFunctionReturn(0); 309 } 310 311 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits) { 312 PetscMPIInt i; 313 PetscScalar **rbuf_t; 314 MPI_Request *r_waits_t; 315 PetscInt len = 0; 316 317 PetscFunctionBegin; 318 /* compute memory required for recv buffers */ 319 for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */ 320 321 /* allocate memory for recv buffers */ 322 PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t)); 323 PetscCall(PetscMalloc1(len, &rbuf_t[0])); 324 for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1]; 325 326 /* Post the receives */ 327 PetscCall(PetscMalloc1(nrecvs, &r_waits_t)); 328 for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i)); 329 330 *rbuf = rbuf_t; 331 *r_waits = r_waits_t; 332 PetscFunctionReturn(0); 333 } 334