1e5c89e4eSSatish Balay #define PETSC_DLL 2e5c89e4eSSatish Balay 3e1fa1e0fSSatish Balay #include "petscsys.h" /*I "petscsys.h" I*/ 4e5c89e4eSSatish Balay 5e5c89e4eSSatish Balay 6e5c89e4eSSatish Balay #undef __FUNCT__ 7e5c89e4eSSatish Balay #define __FUNCT__ "PetscGatherNumberOfMessages" 8e5c89e4eSSatish Balay /*@C 9e5c89e4eSSatish Balay PetscGatherNumberOfMessages - Computes the number of messages a node expects to receive 10e5c89e4eSSatish Balay 11e5c89e4eSSatish Balay Collective on MPI_Comm 12e5c89e4eSSatish Balay 13e5c89e4eSSatish Balay Input Parameters: 14e5c89e4eSSatish Balay + comm - Communicator 15e5c89e4eSSatish Balay . iflags - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a 16e5c89e4eSSatish Balay message from current node to ith node. Optionally PETSC_NULL 17e5c89e4eSSatish Balay - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i]. 18e5c89e4eSSatish Balay Optionally PETSC_NULL. 19e5c89e4eSSatish Balay 20e5c89e4eSSatish Balay Output Parameters: 21e5c89e4eSSatish Balay . nrecvs - number of messages received 22e5c89e4eSSatish Balay 23e5c89e4eSSatish Balay Level: developer 24e5c89e4eSSatish Balay 25e5c89e4eSSatish Balay Concepts: mpi utility 26e5c89e4eSSatish Balay 27e5c89e4eSSatish Balay Notes: 28e5c89e4eSSatish Balay With this info, the correct message lengths can be determined using 29e5c89e4eSSatish Balay PetscGatherMessageLengths() 30e5c89e4eSSatish Balay 31e5c89e4eSSatish Balay Either iflags or ilengths should be provided. If iflags is not 32e5c89e4eSSatish Balay provided (PETSC_NULL) it can be computed from ilengths. If iflags is 33e5c89e4eSSatish Balay provided, ilengths is not required. 34e5c89e4eSSatish Balay 35e5c89e4eSSatish Balay .seealso: PetscGatherMessageLengths() 36e5c89e4eSSatish Balay @*/ 37e5c89e4eSSatish Balay PetscErrorCode PETSC_DLLEXPORT PetscGatherNumberOfMessages(MPI_Comm comm,PetscMPIInt *iflags,PetscMPIInt *ilengths,PetscMPIInt *nrecvs) 38e5c89e4eSSatish Balay { 39*910ba992SMatthew Knepley PetscMPIInt size,rank,*recv_buf,i,*iflags_local = PETSC_NULL,*iflags_localm = PETSC_NULL; 40e5c89e4eSSatish Balay PetscErrorCode ierr; 41e5c89e4eSSatish Balay 42e5c89e4eSSatish Balay PetscFunctionBegin; 43e5c89e4eSSatish Balay 44e5c89e4eSSatish Balay ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 45e5c89e4eSSatish Balay ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 46e5c89e4eSSatish Balay 47e5c89e4eSSatish Balay ierr = PetscMalloc2(size,PetscMPIInt,&recv_buf,size,PetscMPIInt,&iflags_localm);CHKERRQ(ierr); 48e5c89e4eSSatish Balay 49e5c89e4eSSatish Balay /* If iflags not provided, compute iflags from ilengths */ 50e5c89e4eSSatish Balay if (!iflags) { 51e5c89e4eSSatish Balay if (!ilengths) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided"); 52e5c89e4eSSatish Balay iflags_local = iflags_localm; 53e5c89e4eSSatish Balay for (i=0; i<size; i++) { 54e5c89e4eSSatish Balay if (ilengths[i]) iflags_local[i] = 1; 55e5c89e4eSSatish Balay else iflags_local[i] = 0; 56e5c89e4eSSatish Balay } 57e5c89e4eSSatish Balay } else { 58e5c89e4eSSatish Balay iflags_local = iflags; 59e5c89e4eSSatish Balay } 60e5c89e4eSSatish Balay 61e5c89e4eSSatish Balay /* Post an allreduce to determine the numer of messages the current node will receive */ 62e5c89e4eSSatish Balay ierr = MPI_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 63e5c89e4eSSatish Balay *nrecvs = recv_buf[rank]; 64e5c89e4eSSatish Balay 65e5c89e4eSSatish Balay ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr); 66e5c89e4eSSatish Balay PetscFunctionReturn(0); 67e5c89e4eSSatish Balay } 68e5c89e4eSSatish Balay 69e5c89e4eSSatish Balay 70e5c89e4eSSatish Balay #undef __FUNCT__ 71e5c89e4eSSatish Balay #define __FUNCT__ "PetscGatherMessageLengths" 72e5c89e4eSSatish Balay /*@C 73e5c89e4eSSatish Balay PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive, 74e5c89e4eSSatish Balay including (from-id,length) pairs for each message. 75e5c89e4eSSatish Balay 76e5c89e4eSSatish Balay Collective on MPI_Comm 77e5c89e4eSSatish Balay 78e5c89e4eSSatish Balay Input Parameters: 79e5c89e4eSSatish Balay + comm - Communicator 80e5c89e4eSSatish Balay . nsends - number of messages that are to be sent. 81e5c89e4eSSatish Balay . nrecvs - number of messages being received 82e5c89e4eSSatish Balay - ilengths - an array of integers of length sizeof(comm) 83e5c89e4eSSatish Balay a non zero ilengths[i] represent a message to i of length ilengths[i] 84e5c89e4eSSatish Balay 85e5c89e4eSSatish Balay 86e5c89e4eSSatish Balay Output Parameters: 87e5c89e4eSSatish Balay + onodes - list of node-ids from which messages are expected 88e5c89e4eSSatish Balay - olengths - corresponding message lengths 89e5c89e4eSSatish Balay 90e5c89e4eSSatish Balay Level: developer 91e5c89e4eSSatish Balay 92e5c89e4eSSatish Balay Concepts: mpi utility 93e5c89e4eSSatish Balay 94e5c89e4eSSatish Balay Notes: 95e5c89e4eSSatish Balay With this info, the correct MPI_Irecv() can be posted with the correct 96e5c89e4eSSatish Balay from-id, with a buffer with the right amount of memory required. 97e5c89e4eSSatish Balay 98e5c89e4eSSatish Balay The calling function deallocates the memory in onodes and olengths 99e5c89e4eSSatish Balay 100e5c89e4eSSatish Balay To determine nrecevs, one can use PetscGatherNumberOfMessages() 101e5c89e4eSSatish Balay 102e5c89e4eSSatish Balay .seealso: PetscGatherNumberOfMessages() 103e5c89e4eSSatish Balay @*/ 104e5c89e4eSSatish Balay PetscErrorCode PETSC_DLLEXPORT PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,PetscMPIInt *ilengths,PetscMPIInt **onodes,PetscMPIInt **olengths) 105e5c89e4eSSatish Balay { 106e5c89e4eSSatish Balay PetscErrorCode ierr; 107e5c89e4eSSatish Balay PetscMPIInt size,tag,i,j; 108*910ba992SMatthew Knepley MPI_Request *s_waits = PETSC_NULL,*r_waits = PETSC_NULL; 109*910ba992SMatthew Knepley MPI_Status *w_status = PETSC_NULL; 110e5c89e4eSSatish Balay 111e5c89e4eSSatish Balay PetscFunctionBegin; 112e5c89e4eSSatish Balay ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 113e5c89e4eSSatish Balay ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 114e5c89e4eSSatish Balay 115e5c89e4eSSatish Balay /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */ 116e5c89e4eSSatish Balay ierr = PetscMalloc2(nrecvs+nsends,MPI_Request,&r_waits,nrecvs+nsends,MPI_Status,&w_status);CHKERRQ(ierr); 117e5c89e4eSSatish Balay s_waits = r_waits+nrecvs; 118e5c89e4eSSatish Balay 119e5c89e4eSSatish Balay /* Post the Irecv to get the message length-info */ 120e5c89e4eSSatish Balay ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),olengths);CHKERRQ(ierr); 121e5c89e4eSSatish Balay for (i=0; i<nrecvs; i++) { 122e5c89e4eSSatish Balay ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr); 123e5c89e4eSSatish Balay } 124e5c89e4eSSatish Balay 125e5c89e4eSSatish Balay /* Post the Isends with the message length-info */ 126e5c89e4eSSatish Balay for (i=0,j=0; i<size; ++i) { 127e5c89e4eSSatish Balay if (ilengths[i]) { 128e5c89e4eSSatish Balay ierr = MPI_Isend(ilengths+i,1,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr); 129e5c89e4eSSatish Balay j++; 130e5c89e4eSSatish Balay } 131e5c89e4eSSatish Balay } 132e5c89e4eSSatish Balay 133e5c89e4eSSatish Balay /* Post waits on sends and receivs */ 134e5c89e4eSSatish Balay if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);} 135e5c89e4eSSatish Balay 136e5c89e4eSSatish Balay /* Pack up the received data */ 137e5c89e4eSSatish Balay ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),onodes);CHKERRQ(ierr); 138e5c89e4eSSatish Balay for (i=0; i<nrecvs; ++i) { 139e5c89e4eSSatish Balay (*onodes)[i] = w_status[i].MPI_SOURCE; 140e5c89e4eSSatish Balay } 141e5c89e4eSSatish Balay ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr); 142e5c89e4eSSatish Balay PetscFunctionReturn(0); 143e5c89e4eSSatish Balay } 144e5c89e4eSSatish Balay #undef __FUNCT__ 145e5c89e4eSSatish Balay #define __FUNCT__ "PetscGatherMessageLengths2" 146e5c89e4eSSatish Balay /*@C 147e5c89e4eSSatish Balay PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive, 148e5c89e4eSSatish Balay including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths() 149e5c89e4eSSatish Balay except it takes TWO ilenths and output TWO olengths. 150e5c89e4eSSatish Balay 151e5c89e4eSSatish Balay Collective on MPI_Comm 152e5c89e4eSSatish Balay 153e5c89e4eSSatish Balay Input Parameters: 154e5c89e4eSSatish Balay + comm - Communicator 155e5c89e4eSSatish Balay . nsends - number of messages that are to be sent. 156e5c89e4eSSatish Balay . nrecvs - number of messages being received 157e5c89e4eSSatish Balay - ilengths1, ilengths2 - array of integers of length sizeof(comm) 158e5c89e4eSSatish Balay a non zero ilengths[i] represent a message to i of length ilengths[i] 159e5c89e4eSSatish Balay 160e5c89e4eSSatish Balay Output Parameters: 161e5c89e4eSSatish Balay + onodes - list of node-ids from which messages are expected 162e5c89e4eSSatish Balay - olengths1, olengths2 - corresponding message lengths 163e5c89e4eSSatish Balay 164e5c89e4eSSatish Balay Level: developer 165e5c89e4eSSatish Balay 166e5c89e4eSSatish Balay Concepts: mpi utility 167e5c89e4eSSatish Balay 168e5c89e4eSSatish Balay Notes: 169e5c89e4eSSatish Balay With this info, the correct MPI_Irecv() can be posted with the correct 170e5c89e4eSSatish Balay from-id, with a buffer with the right amount of memory required. 171e5c89e4eSSatish Balay 172e5c89e4eSSatish Balay The calling function deallocates the memory in onodes and olengths 173e5c89e4eSSatish Balay 174e5c89e4eSSatish Balay To determine nrecevs, one can use PetscGatherNumberOfMessages() 175e5c89e4eSSatish Balay 176e5c89e4eSSatish Balay .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages() 177e5c89e4eSSatish Balay @*/ 178e5c89e4eSSatish Balay PetscErrorCode PETSC_DLLEXPORT PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,PetscMPIInt *ilengths1,PetscMPIInt *ilengths2,PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2) 179e5c89e4eSSatish Balay { 180e5c89e4eSSatish Balay PetscErrorCode ierr; 181*910ba992SMatthew Knepley PetscMPIInt size,tag,i,j,*buf_s = PETSC_NULL,*buf_r = PETSC_NULL,*buf_j = PETSC_NULL; 182*910ba992SMatthew Knepley MPI_Request *s_waits = PETSC_NULL,*r_waits = PETSC_NULL; 183*910ba992SMatthew Knepley MPI_Status *w_status = PETSC_NULL; 184e5c89e4eSSatish Balay 185e5c89e4eSSatish Balay PetscFunctionBegin; 186e5c89e4eSSatish Balay ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 187e5c89e4eSSatish Balay ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 188e5c89e4eSSatish Balay 189e5c89e4eSSatish Balay /* cannot use PetscMalloc5() because r_waits and s_waits must be contiquous for the call to MPI_Waitall() */ 190e5c89e4eSSatish Balay ierr = PetscMalloc4(nrecvs+nsends,MPI_Request,&r_waits,2*nrecvs,PetscMPIInt,&buf_r,2*nsends,PetscMPIInt,&buf_s,nrecvs+nsends,MPI_Status,&w_status);CHKERRQ(ierr); 191e5c89e4eSSatish Balay s_waits = r_waits + nrecvs; 192e5c89e4eSSatish Balay 193e5c89e4eSSatish Balay /* Post the Irecv to get the message length-info */ 194e5c89e4eSSatish Balay ierr = PetscMalloc((nrecvs+1)*sizeof(PetscMPIInt),olengths1);CHKERRQ(ierr); 195e5c89e4eSSatish Balay ierr = PetscMalloc((nrecvs+1)*sizeof(PetscMPIInt),olengths2);CHKERRQ(ierr); 196e5c89e4eSSatish Balay for (i=0; i<nrecvs; i++) { 197e5c89e4eSSatish Balay buf_j = buf_r + (2*i); 198e5c89e4eSSatish Balay ierr = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr); 199e5c89e4eSSatish Balay } 200e5c89e4eSSatish Balay 201e5c89e4eSSatish Balay /* Post the Isends with the message length-info */ 202e5c89e4eSSatish Balay for (i=0,j=0; i<size; ++i) { 203e5c89e4eSSatish Balay if (ilengths1[i]) { 204e5c89e4eSSatish Balay buf_j = buf_s + (2*j); 205e5c89e4eSSatish Balay buf_j[0] = *(ilengths1+i); 206e5c89e4eSSatish Balay buf_j[1] = *(ilengths2+i); 207e5c89e4eSSatish Balay ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr); 208e5c89e4eSSatish Balay j++; 209e5c89e4eSSatish Balay } 210e5c89e4eSSatish Balay } 211e5c89e4eSSatish Balay 212e5c89e4eSSatish Balay /* Post waits on sends and receivs */ 213e5c89e4eSSatish Balay if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);} 214e5c89e4eSSatish Balay 215e5c89e4eSSatish Balay 216e5c89e4eSSatish Balay /* Pack up the received data */ 217e5c89e4eSSatish Balay ierr = PetscMalloc((nrecvs+1)*sizeof(PetscMPIInt),onodes);CHKERRQ(ierr); 218e5c89e4eSSatish Balay for (i=0; i<nrecvs; ++i) { 219e5c89e4eSSatish Balay (*onodes)[i] = w_status[i].MPI_SOURCE; 220e5c89e4eSSatish Balay buf_j = buf_r + (2*i); 221e5c89e4eSSatish Balay (*olengths1)[i] = buf_j[0]; 222e5c89e4eSSatish Balay (*olengths2)[i] = buf_j[1]; 223e5c89e4eSSatish Balay } 224e5c89e4eSSatish Balay 225e5c89e4eSSatish Balay ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr); 226e5c89e4eSSatish Balay PetscFunctionReturn(0); 227e5c89e4eSSatish Balay } 228e5c89e4eSSatish Balay 229e5c89e4eSSatish Balay /* 230e5c89e4eSSatish Balay 231e5c89e4eSSatish Balay Allocate a bufffer sufficient to hold messages of size specified in olengths. 232e5c89e4eSSatish Balay And post Irecvs on these buffers using node info from onodes 233e5c89e4eSSatish Balay 234e5c89e4eSSatish Balay */ 235e5c89e4eSSatish Balay #undef __FUNCT__ 236e5c89e4eSSatish Balay #define __FUNCT__ "PetscPostIrecvInt" 237e5c89e4eSSatish Balay PetscErrorCode PETSC_DLLEXPORT PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,PetscMPIInt *onodes,PetscMPIInt *olengths,PetscInt ***rbuf,MPI_Request **r_waits) 238e5c89e4eSSatish Balay { 239e5c89e4eSSatish Balay PetscErrorCode ierr; 240e5c89e4eSSatish Balay PetscInt len=0,**rbuf_t,i; 241e5c89e4eSSatish Balay MPI_Request *r_waits_t; 242e5c89e4eSSatish Balay 243e5c89e4eSSatish Balay PetscFunctionBegin; 244e5c89e4eSSatish Balay 245e5c89e4eSSatish Balay /* compute memory required for recv buffers */ 246e5c89e4eSSatish Balay for (i=0; i<nrecvs; i++) len += olengths[i]; /* each message length */ 247e5c89e4eSSatish Balay len *= sizeof(PetscInt); 248e5c89e4eSSatish Balay len += (nrecvs+1)*sizeof(PetscInt*); /* Array of pointers for each message */ 249e5c89e4eSSatish Balay 250e5c89e4eSSatish Balay /* allocate memory for recv buffers */ 251e5c89e4eSSatish Balay ierr = PetscMalloc(len,&rbuf_t);CHKERRQ(ierr); 252e5c89e4eSSatish Balay rbuf_t[0] = (PetscInt*)(rbuf_t + nrecvs); 253e5c89e4eSSatish Balay for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1]; 254e5c89e4eSSatish Balay 255e5c89e4eSSatish Balay /* Post the receives */ 256e5c89e4eSSatish Balay ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&r_waits_t);CHKERRQ(ierr); 257e5c89e4eSSatish Balay for (i=0; i<nrecvs; ++i) { 258e5c89e4eSSatish Balay ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRQ(ierr); 259e5c89e4eSSatish Balay } 260e5c89e4eSSatish Balay 261e5c89e4eSSatish Balay *rbuf = rbuf_t; 262e5c89e4eSSatish Balay *r_waits = r_waits_t; 263e5c89e4eSSatish Balay PetscFunctionReturn(0); 264e5c89e4eSSatish Balay } 265e5c89e4eSSatish Balay 266e5c89e4eSSatish Balay #undef __FUNCT__ 267e5c89e4eSSatish Balay #define __FUNCT__ "PetscPostIrecvScalar" 268e5c89e4eSSatish Balay PetscErrorCode PETSC_DLLEXPORT PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,PetscMPIInt *onodes,PetscMPIInt *olengths,PetscScalar ***rbuf,MPI_Request **r_waits) 269e5c89e4eSSatish Balay { 270e5c89e4eSSatish Balay PetscErrorCode ierr; 271e5c89e4eSSatish Balay PetscMPIInt len=0,i; 272e5c89e4eSSatish Balay PetscScalar **rbuf_t; 273e5c89e4eSSatish Balay MPI_Request *r_waits_t; 274e5c89e4eSSatish Balay 275e5c89e4eSSatish Balay PetscFunctionBegin; 276e5c89e4eSSatish Balay 277e5c89e4eSSatish Balay /* compute memory required for recv buffers */ 278e5c89e4eSSatish Balay for (i=0; i<nrecvs; i++) len += olengths[i]; /* each message length */ 279e5c89e4eSSatish Balay len *= sizeof(PetscScalar); 280e5c89e4eSSatish Balay len += (nrecvs+1)*sizeof(PetscScalar*); /* Array of pointers for each message */ 281e5c89e4eSSatish Balay 282e5c89e4eSSatish Balay 283e5c89e4eSSatish Balay /* allocate memory for recv buffers */ 284e5c89e4eSSatish Balay ierr = PetscMalloc(len,&rbuf_t);CHKERRQ(ierr); 285e5c89e4eSSatish Balay rbuf_t[0] = (PetscScalar*)(rbuf_t + nrecvs); 286e5c89e4eSSatish Balay for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1]; 287e5c89e4eSSatish Balay 288e5c89e4eSSatish Balay /* Post the receives */ 289e5c89e4eSSatish Balay ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&r_waits_t);CHKERRQ(ierr); 290e5c89e4eSSatish Balay for (i=0; i<nrecvs; ++i) { 291e5c89e4eSSatish Balay ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRQ(ierr); 292e5c89e4eSSatish Balay } 293e5c89e4eSSatish Balay 294e5c89e4eSSatish Balay *rbuf = rbuf_t; 295e5c89e4eSSatish Balay *r_waits = r_waits_t; 296e5c89e4eSSatish Balay PetscFunctionReturn(0); 297e5c89e4eSSatish Balay } 298