1 2 #include <petscsys.h> /*I "petscsys.h" I*/ 3 #include <petsc/private/mpiutils.h> 4 5 /*@C 6 PetscGatherNumberOfMessages - Computes the number of messages a node expects to receive 7 8 Collective 9 10 Input Parameters: 11 + comm - Communicator 12 . iflags - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a 13 message from current node to ith node. Optionally NULL 14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i]. 15 Optionally NULL. 16 17 Output Parameters: 18 . nrecvs - number of messages received 19 20 Level: developer 21 22 Notes: 23 With this info, the correct message lengths can be determined using 24 PetscGatherMessageLengths() 25 26 Either iflags or ilengths should be provided. If iflags is not 27 provided (NULL) it can be computed from ilengths. If iflags is 28 provided, ilengths is not required. 29 30 .seealso: `PetscGatherMessageLengths()` 31 @*/ 32 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs) 33 { 34 PetscMPIInt size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL; 35 36 PetscFunctionBegin; 37 PetscCallMPI(MPI_Comm_size(comm,&size)); 38 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 39 40 PetscCall(PetscMalloc2(size,&recv_buf,size,&iflags_localm)); 41 42 /* If iflags not provided, compute iflags from ilengths */ 43 if (!iflags) { 44 PetscCheck(ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided"); 45 iflags_local = iflags_localm; 46 for (i=0; i<size; i++) { 47 if (ilengths[i]) iflags_local[i] = 1; 48 else iflags_local[i] = 0; 49 } 50 } else iflags_local = (PetscMPIInt*) iflags; 51 52 /* Post an allreduce to determine the numer of messages the current node will receive */ 53 PetscCall(MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm)); 54 *nrecvs = recv_buf[rank]; 55 56 PetscCall(PetscFree2(recv_buf,iflags_localm)); 57 PetscFunctionReturn(0); 58 } 59 60 /*@C 61 PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive, 62 including (from-id,length) pairs for each message. 63 64 Collective 65 66 Input Parameters: 67 + comm - Communicator 68 . nsends - number of messages that are to be sent. 69 . nrecvs - number of messages being received 70 - ilengths - an array of integers of length sizeof(comm) 71 a non zero ilengths[i] represent a message to i of length ilengths[i] 72 73 Output Parameters: 74 + onodes - list of node-ids from which messages are expected 75 - olengths - corresponding message lengths 76 77 Level: developer 78 79 Notes: 80 With this info, the correct MPI_Irecv() can be posted with the correct 81 from-id, with a buffer with the right amount of memory required. 82 83 The calling function deallocates the memory in onodes and olengths 84 85 To determine nrecvs, one can use PetscGatherNumberOfMessages() 86 87 .seealso: `PetscGatherNumberOfMessages()` 88 @*/ 89 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths) 90 { 91 PetscMPIInt size,rank,tag,i,j; 92 MPI_Request *s_waits = NULL,*r_waits = NULL; 93 MPI_Status *w_status = NULL; 94 95 PetscFunctionBegin; 96 PetscCallMPI(MPI_Comm_size(comm,&size)); 97 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 98 PetscCall(PetscCommGetNewTag(comm,&tag)); 99 100 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 101 PetscCall(PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status)); 102 s_waits = r_waits+nrecvs; 103 104 /* Post the Irecv to get the message length-info */ 105 PetscCall(PetscMalloc1(nrecvs,olengths)); 106 for (i=0; i<nrecvs; i++) { 107 PetscCallMPI(MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i)); 108 } 109 110 /* Post the Isends with the message length-info */ 111 for (i=0,j=0; i<size; ++i) { 112 if (ilengths[i]) { 113 PetscCallMPI(MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j)); 114 j++; 115 } 116 } 117 118 /* Post waits on sends and receivs */ 119 if (nrecvs+nsends) PetscCallMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status)); 120 121 /* Pack up the received data */ 122 PetscCall(PetscMalloc1(nrecvs,onodes)); 123 for (i=0; i<nrecvs; ++i) { 124 (*onodes)[i] = w_status[i].MPI_SOURCE; 125 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 126 /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS. 127 It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI 128 does not put correct value in recv buffer. See also 129 https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html 130 https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html 131 */ 132 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; 133 #endif 134 } 135 PetscCall(PetscFree2(r_waits,w_status)); 136 PetscFunctionReturn(0); 137 } 138 139 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */ 140 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm,const PetscMPIInt iflags[],const PetscInt ilengths[],PetscMPIInt *nrecvs) 141 { 142 PetscMPIInt size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL; 143 144 PetscFunctionBegin; 145 PetscCallMPI(MPI_Comm_size(comm,&size)); 146 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 147 148 PetscCall(PetscMalloc2(size,&recv_buf,size,&iflags_localm)); 149 150 /* If iflags not provided, compute iflags from ilengths */ 151 if (!iflags) { 152 PetscCheck(ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided"); 153 iflags_local = iflags_localm; 154 for (i=0; i<size; i++) { 155 if (ilengths[i]) iflags_local[i] = 1; 156 else iflags_local[i] = 0; 157 } 158 } else iflags_local = (PetscMPIInt*) iflags; 159 160 /* Post an allreduce to determine the numer of messages the current node will receive */ 161 PetscCall(MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm)); 162 *nrecvs = recv_buf[rank]; 163 164 PetscCall(PetscFree2(recv_buf,iflags_localm)); 165 PetscFunctionReturn(0); 166 } 167 168 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */ 169 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscInt ilengths[],PetscMPIInt **onodes,PetscInt **olengths) 170 { 171 PetscMPIInt size,rank,tag,i,j; 172 MPI_Request *s_waits = NULL,*r_waits = NULL; 173 MPI_Status *w_status = NULL; 174 175 PetscFunctionBegin; 176 PetscCallMPI(MPI_Comm_size(comm,&size)); 177 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 178 PetscCall(PetscCommGetNewTag(comm,&tag)); 179 180 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 181 PetscCall(PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status)); 182 s_waits = r_waits+nrecvs; 183 184 /* Post the Irecv to get the message length-info */ 185 PetscCall(PetscMalloc1(nrecvs,olengths)); 186 for (i=0; i<nrecvs; i++) { 187 PetscCallMPI(MPI_Irecv((*olengths)+i,1,MPIU_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i)); 188 } 189 190 /* Post the Isends with the message length-info */ 191 for (i=0,j=0; i<size; ++i) { 192 if (ilengths[i]) { 193 PetscCallMPI(MPI_Isend((void*)(ilengths+i),1,MPIU_INT,i,tag,comm,s_waits+j)); 194 j++; 195 } 196 } 197 198 /* Post waits on sends and receivs */ 199 if (nrecvs+nsends) PetscCallMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status)); 200 201 /* Pack up the received data */ 202 PetscCall(PetscMalloc1(nrecvs,onodes)); 203 for (i=0; i<nrecvs; ++i) { 204 (*onodes)[i] = w_status[i].MPI_SOURCE; 205 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */ 206 } 207 PetscCall(PetscFree2(r_waits,w_status)); 208 PetscFunctionReturn(0); 209 } 210 211 /*@C 212 PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive, 213 including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths() 214 except it takes TWO ilenths and output TWO olengths. 215 216 Collective 217 218 Input Parameters: 219 + comm - Communicator 220 . nsends - number of messages that are to be sent. 221 . nrecvs - number of messages being received 222 . ilengths1 - first array of integers of length sizeof(comm) 223 - ilengths2 - second array of integers of length sizeof(comm) 224 225 Output Parameters: 226 + onodes - list of node-ids from which messages are expected 227 . olengths1 - first corresponding message lengths 228 - olengths2 - second message lengths 229 230 Level: developer 231 232 Notes: 233 With this info, the correct MPI_Irecv() can be posted with the correct 234 from-id, with a buffer with the right amount of memory required. 235 236 The calling function deallocates the memory in onodes and olengths 237 238 To determine nrecvs, one can use PetscGatherNumberOfMessages() 239 240 .seealso: `PetscGatherMessageLengths()` `and` `PetscGatherNumberOfMessages()` 241 @*/ 242 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2) 243 { 244 PetscMPIInt size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL; 245 MPI_Request *s_waits = NULL,*r_waits = NULL; 246 MPI_Status *w_status = NULL; 247 248 PetscFunctionBegin; 249 PetscCallMPI(MPI_Comm_size(comm,&size)); 250 PetscCall(PetscCommGetNewTag(comm,&tag)); 251 252 /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ 253 PetscCall(PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status)); 254 s_waits = r_waits + nrecvs; 255 256 /* Post the Irecv to get the message length-info */ 257 PetscCall(PetscMalloc1(nrecvs+1,olengths1)); 258 PetscCall(PetscMalloc1(nrecvs+1,olengths2)); 259 for (i=0; i<nrecvs; i++) { 260 buf_j = buf_r + (2*i); 261 PetscCallMPI(MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i)); 262 } 263 264 /* Post the Isends with the message length-info */ 265 for (i=0,j=0; i<size; ++i) { 266 if (ilengths1[i]) { 267 buf_j = buf_s + (2*j); 268 buf_j[0] = *(ilengths1+i); 269 buf_j[1] = *(ilengths2+i); 270 PetscCallMPI(MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j)); 271 j++; 272 } 273 } 274 PetscCheck(j == nsends,PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d",j,nsends); 275 276 /* Post waits on sends and receivs */ 277 if (nrecvs+nsends) PetscCallMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status)); 278 279 /* Pack up the received data */ 280 PetscCall(PetscMalloc1(nrecvs+1,onodes)); 281 for (i=0; i<nrecvs; ++i) { 282 (*onodes)[i] = w_status[i].MPI_SOURCE; 283 buf_j = buf_r + (2*i); 284 (*olengths1)[i] = buf_j[0]; 285 (*olengths2)[i] = buf_j[1]; 286 } 287 288 PetscCall(PetscFree4(r_waits,buf_r,buf_s,w_status)); 289 PetscFunctionReturn(0); 290 } 291 292 /* 293 294 Allocate a buffer sufficient to hold messages of size specified in olengths. 295 And post Irecvs on these buffers using node info from onodes 296 297 */ 298 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits) 299 { 300 PetscInt **rbuf_t,i,len = 0; 301 MPI_Request *r_waits_t; 302 303 PetscFunctionBegin; 304 /* compute memory required for recv buffers */ 305 for (i=0; i<nrecvs; i++) len += olengths[i]; /* each message length */ 306 307 /* allocate memory for recv buffers */ 308 PetscCall(PetscMalloc1(nrecvs+1,&rbuf_t)); 309 PetscCall(PetscMalloc1(len,&rbuf_t[0])); 310 for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1]; 311 312 /* Post the receives */ 313 PetscCall(PetscMalloc1(nrecvs,&r_waits_t)); 314 for (i=0; i<nrecvs; ++i) { 315 PetscCallMPI(MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i)); 316 } 317 318 *rbuf = rbuf_t; 319 *r_waits = r_waits_t; 320 PetscFunctionReturn(0); 321 } 322 323 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits) 324 { 325 PetscMPIInt i; 326 PetscScalar **rbuf_t; 327 MPI_Request *r_waits_t; 328 PetscInt len = 0; 329 330 PetscFunctionBegin; 331 /* compute memory required for recv buffers */ 332 for (i=0; i<nrecvs; i++) len += olengths[i]; /* each message length */ 333 334 /* allocate memory for recv buffers */ 335 PetscCall(PetscMalloc1(nrecvs+1,&rbuf_t)); 336 PetscCall(PetscMalloc1(len,&rbuf_t[0])); 337 for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1]; 338 339 /* Post the receives */ 340 PetscCall(PetscMalloc1(nrecvs,&r_waits_t)); 341 for (i=0; i<nrecvs; ++i) { 342 PetscCallMPI(MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i)); 343 } 344 345 *rbuf = rbuf_t; 346 *r_waits = r_waits_t; 347 PetscFunctionReturn(0); 348 } 349