1 2 #include <petscsys.h> /*I "petscsys.h" I*/ 3 #include <petsc/private/mpiutils.h> 4 5 /*@C 6 PetscGatherNumberOfMessages - Computes the number of messages a node expects to receive 7 8 Collective 9 10 Input Parameters: 11 + comm - Communicator 12 . iflags - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a 13 message from current node to ith node. Optionally NULL 14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i]. 15 Optionally NULL. 16 17 Output Parameters: 18 . nrecvs - number of messages received 19 20 Level: developer 21 22 Notes: 23 With this info, the correct message lengths can be determined using 24 PetscGatherMessageLengths() 25 26 Either iflags or ilengths should be provided. If iflags is not 27 provided (NULL) it can be computed from ilengths. If iflags is 28 provided, ilengths is not required. 29 30 .seealso: PetscGatherMessageLengths() 31 @*/ 32 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs) 33 { 34 PetscMPIInt size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL; 35 PetscErrorCode ierr; 36 37 PetscFunctionBegin; 38 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 39 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 40 41 ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr); 42 43 /* If iflags not provided, compute iflags from ilengths */ 44 if (!iflags) { 45 PetscCheckFalse(!ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided"); 46 iflags_local = iflags_localm; 47 for (i=0; i<size; i++) { 48 if (ilengths[i]) iflags_local[i] = 1; 49 else iflags_local[i] = 0; 50 } 51 } else iflags_local = (PetscMPIInt*) iflags; 52 53 /* Post an allreduce to determine the numer of messages the current node will receive */ 54 ierr = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 55 *nrecvs = recv_buf[rank]; 56 57 ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 /*@C 62 PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive, 63 including (from-id,length) pairs for each message. 64 65 Collective 66 67 Input Parameters: 68 + comm - Communicator 69 . nsends - number of messages that are to be sent. 70 . nrecvs - number of messages being received 71 - ilengths - an array of integers of length sizeof(comm) 72 a non zero ilengths[i] represent a message to i of length ilengths[i] 73 74 Output Parameters: 75 + onodes - list of node-ids from which messages are expected 76 - olengths - corresponding message lengths 77 78 Level: developer 79 80 Notes: 81 With this info, the correct MPI_Irecv() can be posted with the correct 82 from-id, with a buffer with the right amount of memory required. 83 84 The calling function deallocates the memory in onodes and olengths 85 86 To determine nrecvs, one can use PetscGatherNumberOfMessages() 87 88 .seealso: PetscGatherNumberOfMessages() 89 @*/ 90 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt size,rank,tag,i,j; 94 MPI_Request *s_waits = NULL,*r_waits = NULL; 95 MPI_Status *w_status = NULL; 96 97 PetscFunctionBegin; 98 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 99 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 100 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 101 102 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 103 ierr = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr); 104 s_waits = r_waits+nrecvs; 105 106 /* Post the Irecv to get the message length-info */ 107 ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr); 108 for (i=0; i<nrecvs; i++) { 109 ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr); 110 } 111 112 /* Post the Isends with the message length-info */ 113 for (i=0,j=0; i<size; ++i) { 114 if (ilengths[i]) { 115 ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr); 116 j++; 117 } 118 } 119 120 /* Post waits on sends and receivs */ 121 if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);} 122 123 /* Pack up the received data */ 124 ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr); 125 for (i=0; i<nrecvs; ++i) { 126 (*onodes)[i] = w_status[i].MPI_SOURCE; 127 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) 128 /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS. 129 It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI 130 does not put correct value in recv buffer. See also 131 https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html 132 https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html 133 */ 134 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; 135 #endif 136 } 137 ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */ 142 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm,const PetscMPIInt iflags[],const PetscInt ilengths[],PetscMPIInt *nrecvs) 143 { 144 PetscMPIInt size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL; 145 PetscErrorCode ierr; 146 147 PetscFunctionBegin; 148 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 149 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 150 151 ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr); 152 153 /* If iflags not provided, compute iflags from ilengths */ 154 if (!iflags) { 155 PetscCheckFalse(!ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided"); 156 iflags_local = iflags_localm; 157 for (i=0; i<size; i++) { 158 if (ilengths[i]) iflags_local[i] = 1; 159 else iflags_local[i] = 0; 160 } 161 } else iflags_local = (PetscMPIInt*) iflags; 162 163 /* Post an allreduce to determine the numer of messages the current node will receive */ 164 ierr = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 165 *nrecvs = recv_buf[rank]; 166 167 ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */ 172 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscInt ilengths[],PetscMPIInt **onodes,PetscInt **olengths) 173 { 174 PetscErrorCode ierr; 175 PetscMPIInt size,rank,tag,i,j; 176 MPI_Request *s_waits = NULL,*r_waits = NULL; 177 MPI_Status *w_status = NULL; 178 179 PetscFunctionBegin; 180 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 181 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 182 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 183 184 /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 185 ierr = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr); 186 s_waits = r_waits+nrecvs; 187 188 /* Post the Irecv to get the message length-info */ 189 ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr); 190 for (i=0; i<nrecvs; i++) { 191 ierr = MPI_Irecv((*olengths)+i,1,MPIU_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr); 192 } 193 194 /* Post the Isends with the message length-info */ 195 for (i=0,j=0; i<size; ++i) { 196 if (ilengths[i]) { 197 ierr = MPI_Isend((void*)(ilengths+i),1,MPIU_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr); 198 j++; 199 } 200 } 201 202 /* Post waits on sends and receivs */ 203 if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);} 204 205 /* Pack up the received data */ 206 ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr); 207 for (i=0; i<nrecvs; ++i) { 208 (*onodes)[i] = w_status[i].MPI_SOURCE; 209 if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */ 210 } 211 ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr); 212 PetscFunctionReturn(0); 213 } 214 215 /*@C 216 PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive, 217 including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths() 218 except it takes TWO ilenths and output TWO olengths. 219 220 Collective 221 222 Input Parameters: 223 + comm - Communicator 224 . nsends - number of messages that are to be sent. 225 . nrecvs - number of messages being received 226 . ilengths1 - first array of integers of length sizeof(comm) 227 - ilengths2 - second array of integers of length sizeof(comm) 228 229 Output Parameters: 230 + onodes - list of node-ids from which messages are expected 231 . olengths1 - first corresponding message lengths 232 - olengths2 - second message lengths 233 234 Level: developer 235 236 Notes: 237 With this info, the correct MPI_Irecv() can be posted with the correct 238 from-id, with a buffer with the right amount of memory required. 239 240 The calling function deallocates the memory in onodes and olengths 241 242 To determine nrecvs, one can use PetscGatherNumberOfMessages() 243 244 .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages() 245 @*/ 246 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2) 247 { 248 PetscErrorCode ierr; 249 PetscMPIInt size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL; 250 MPI_Request *s_waits = NULL,*r_waits = NULL; 251 MPI_Status *w_status = NULL; 252 253 PetscFunctionBegin; 254 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 255 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 256 257 /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */ 258 ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr); 259 s_waits = r_waits + nrecvs; 260 261 /* Post the Irecv to get the message length-info */ 262 ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr); 263 ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr); 264 for (i=0; i<nrecvs; i++) { 265 buf_j = buf_r + (2*i); 266 ierr = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr); 267 } 268 269 /* Post the Isends with the message length-info */ 270 for (i=0,j=0; i<size; ++i) { 271 if (ilengths1[i]) { 272 buf_j = buf_s + (2*j); 273 buf_j[0] = *(ilengths1+i); 274 buf_j[1] = *(ilengths2+i); 275 ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr); 276 j++; 277 } 278 } 279 PetscCheckFalse(j != nsends,PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d",j,nsends); 280 281 /* Post waits on sends and receivs */ 282 if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);} 283 284 /* Pack up the received data */ 285 ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr); 286 for (i=0; i<nrecvs; ++i) { 287 (*onodes)[i] = w_status[i].MPI_SOURCE; 288 buf_j = buf_r + (2*i); 289 (*olengths1)[i] = buf_j[0]; 290 (*olengths2)[i] = buf_j[1]; 291 } 292 293 ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 /* 298 299 Allocate a buffer sufficient to hold messages of size specified in olengths. 300 And post Irecvs on these buffers using node info from onodes 301 302 */ 303 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits) 304 { 305 PetscErrorCode ierr; 306 PetscInt **rbuf_t,i,len = 0; 307 MPI_Request *r_waits_t; 308 309 PetscFunctionBegin; 310 /* compute memory required for recv buffers */ 311 for (i=0; i<nrecvs; i++) len += olengths[i]; /* each message length */ 312 313 /* allocate memory for recv buffers */ 314 ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr); 315 ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr); 316 for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1]; 317 318 /* Post the receives */ 319 ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr); 320 for (i=0; i<nrecvs; ++i) { 321 ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr); 322 } 323 324 *rbuf = rbuf_t; 325 *r_waits = r_waits_t; 326 PetscFunctionReturn(0); 327 } 328 329 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits) 330 { 331 PetscErrorCode ierr; 332 PetscMPIInt i; 333 PetscScalar **rbuf_t; 334 MPI_Request *r_waits_t; 335 PetscInt len = 0; 336 337 PetscFunctionBegin; 338 /* compute memory required for recv buffers */ 339 for (i=0; i<nrecvs; i++) len += olengths[i]; /* each message length */ 340 341 /* allocate memory for recv buffers */ 342 ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr); 343 ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr); 344 for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1]; 345 346 /* Post the receives */ 347 ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr); 348 for (i=0; i<nrecvs; ++i) { 349 ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr); 350 } 351 352 *rbuf = rbuf_t; 353 *r_waits = r_waits_t; 354 PetscFunctionReturn(0); 355 } 356