1 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /*===================================================================================*/ 5 /* Internal routines for PetscSFPack */ 6 /*===================================================================================*/ 7 8 /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been 9 initialized (since we use persistent requests), then initialize them. 10 */ 11 static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs) 12 { 13 PetscErrorCode ierr; 14 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 15 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks; 16 const PetscInt *rootoffset,*leafoffset; 17 PetscMPIInt n; 18 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 19 MPI_Datatype unit = link->unit; 20 PetscMemType rootmtype,leafmtype; 21 22 PetscFunctionBegin; 23 if (use_gpu_aware_mpi) { 24 rootmtype = link->rootmtype; 25 leafmtype = link->leafmtype; 26 } else { 27 rootmtype = PETSC_MEMTYPE_HOST; 28 leafmtype = PETSC_MEMTYPE_HOST; 29 } 30 31 if (rootreqs && !link->rootreqsinited[direction][rootmtype]) { 32 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 33 if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 34 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 35 MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes; 36 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 37 ierr = MPI_Recv_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr); 38 } 39 } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 40 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 41 MPI_Aint disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes; 42 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 43 ierr = MPI_Send_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr); 44 } 45 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction); 46 link->rootreqsinited[direction][rootmtype] = PETSC_TRUE; 47 } 48 49 if (leafreqs && !link->leafreqsinited[direction][leafmtype]) { 50 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 51 if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 52 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 53 MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes; 54 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 55 ierr = MPI_Send_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr); 56 } 57 } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 58 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 59 MPI_Aint disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes; 60 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 61 ierr = MPI_Recv_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr); 62 } 63 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction); 64 link->leafreqsinited[direction][leafmtype] = PETSC_TRUE; 65 } 66 67 if (rootreqs) *rootreqs = link->rootreqs[direction][rootmtype]; 68 if (leafreqs) *leafreqs = link->leafreqs[direction][leafmtype]; 69 PetscFunctionReturn(0); 70 } 71 72 /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */ 73 PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscInt nrootreqs,PetscInt nleafreqs,PetscSFPack *mylink) 74 { 75 PetscErrorCode ierr; 76 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 77 PetscInt i,j,nreqs,nrootranks,ndrootranks,nleafranks,ndleafranks; 78 const PetscInt *rootoffset,*leafoffset; 79 PetscSFPack *p,link; 80 PetscBool match; 81 82 PetscFunctionBegin; 83 ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);CHKERRQ(ierr); 84 85 /* Look for types in cache */ 86 for (p=&bas->avail; (link=*p); p=&link->next) { 87 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 88 if (match) { 89 *p = link->next; /* Remove from available list */ 90 goto found; 91 } 92 } 93 94 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 95 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 96 ierr = PetscNew(&link);CHKERRQ(ierr); 97 ierr = PetscSFPackSetUp_Host(sf,link,unit);CHKERRQ(ierr); 98 ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* One tag per link */ 99 100 /* Allocate root, leaf, self buffers, and MPI requests */ 101 link->rootbuflen = rootoffset[nrootranks]-rootoffset[ndrootranks]; 102 link->leafbuflen = leafoffset[nleafranks]-leafoffset[ndleafranks]; 103 link->selfbuflen = rootoffset[ndrootranks]; 104 link->nrootreqs = nrootreqs; 105 link->nleafreqs = nleafreqs; 106 nreqs = (nrootreqs+nleafreqs)*4; /* Quadruple the requests since there are two communication directions and two memory types */ 107 ierr = PetscMalloc1(nreqs,&link->reqs);CHKERRQ(ierr); 108 for (i=0; i<nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */ 109 110 for (i=0; i<2; i++) { /* Two communication directions */ 111 for (j=0; j<2; j++) { /* Two memory types */ 112 link->rootreqs[i][j] = link->reqs + nrootreqs*(2*i+j); 113 link->leafreqs[i][j] = link->reqs + nrootreqs*4 + nleafreqs*(2*i+j); 114 } 115 } 116 117 found: 118 link->rootmtype = rootmtype; 119 link->leafmtype = leafmtype; 120 #if defined(PETSC_HAVE_CUDA) 121 ierr = PetscSFPackSetUp_Device(sf,link,unit);CHKERRQ(ierr); 122 if (!use_gpu_aware_mpi) { 123 /* If not using GPU aware MPI, we always need buffers on host. In case root/leafdata is on device, we copy root/leafdata to/from 124 these buffers for MPI. We only need buffers for remote neighbors since self-to-self communication is not done via MPI. 125 */ 126 if (!link->rootbuf[PETSC_MEMTYPE_HOST]) { 127 if (rootmtype == PETSC_MEMTYPE_DEVICE && sf->use_pinned_buf) { 128 ierr = PetscMallocPinnedMemory(link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr); 129 } else { 130 ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr); 131 } 132 } 133 if (!link->leafbuf[PETSC_MEMTYPE_HOST]) { 134 if (leafmtype == PETSC_MEMTYPE_DEVICE && sf->use_pinned_buf) { 135 ierr = PetscMallocPinnedMemory(link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr); 136 } else { 137 ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr); 138 } 139 } 140 } 141 #endif 142 if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);} 143 if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);} 144 if (!link->selfbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[rootmtype]);CHKERRQ(ierr);} 145 if (rootmtype != leafmtype && !link->selfbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[leafmtype]);CHKERRQ(ierr);} 146 link->rootdata = rootdata; 147 link->leafdata = leafdata; 148 link->next = bas->inuse; 149 bas->inuse = link; 150 *mylink = link; 151 PetscFunctionReturn(0); 152 } 153 154 static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFDirection direction,PetscSFPack *mylink) 155 { 156 PetscErrorCode ierr; 157 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks; 158 159 PetscFunctionBegin; 160 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr); 161 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 162 ierr = PetscSFPackGet_Basic_Common(sf,unit,rootmtype,rootdata,leafmtype,leafdata,nrootranks-ndrootranks,nleafranks-ndleafranks,mylink);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 /*===================================================================================*/ 167 /* SF public interface implementations */ 168 /*===================================================================================*/ 169 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 170 { 171 PetscErrorCode ierr; 172 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 173 PetscInt *rlengths,*ilengths,i; 174 PetscMPIInt rank,niranks,*iranks,tag; 175 MPI_Comm comm; 176 MPI_Group group; 177 MPI_Request *rootreqs,*leafreqs; 178 179 PetscFunctionBegin; 180 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 181 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 182 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 183 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 184 ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 185 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 186 /* 187 * Inform roots about how many leaves and from which ranks 188 */ 189 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 190 /* Determine number, sending ranks, and length of incoming */ 191 for (i=0; i<sf->nranks; i++) { 192 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 193 } 194 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 195 196 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 197 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 198 small and the sorting is cheap. 199 */ 200 ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 201 202 /* Partition into distinguished and non-distinguished incoming ranks */ 203 bas->ndiranks = sf->ndranks; 204 bas->niranks = bas->ndiranks + niranks; 205 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 206 bas->ioffset[0] = 0; 207 for (i=0; i<bas->ndiranks; i++) { 208 bas->iranks[i] = sf->ranks[i]; 209 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 210 } 211 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 212 for ( ; i<bas->niranks; i++) { 213 bas->iranks[i] = iranks[i-bas->ndiranks]; 214 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 215 } 216 bas->itotal = bas->ioffset[i]; 217 ierr = PetscFree(rlengths);CHKERRQ(ierr); 218 ierr = PetscFree(iranks);CHKERRQ(ierr); 219 ierr = PetscFree(ilengths);CHKERRQ(ierr); 220 221 /* Send leaf identities to roots */ 222 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 223 ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 224 for (i=bas->ndiranks; i<bas->niranks; i++) { 225 ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 226 } 227 for (i=0; i<sf->nranks; i++) { 228 PetscMPIInt npoints; 229 ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 230 if (i < sf->ndranks) { 231 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 232 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 233 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 234 ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 235 continue; 236 } 237 ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 238 } 239 ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 240 ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 241 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 242 243 sf->selfleafdups = PETSC_TRUE; /* The conservative assumption is there are data race */ 244 sf->remoteleafdups = PETSC_TRUE; 245 bas->selfrootdups = PETSC_TRUE; 246 bas->remoterootdups = PETSC_TRUE; 247 248 /* Setup packing optimization for roots and leaves */ 249 ierr = PetscSFPackSetupOptimizations_Basic(sf);CHKERRQ(ierr); 250 PetscFunctionReturn(0); 251 } 252 253 PETSC_INTERN PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf) 254 { 255 PetscErrorCode ierr; 256 257 PetscFunctionBegin; 258 ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr); 259 ierr = PetscOptionsBool("-sf_use_pinned_buffer","Use pinned (nonpagable) memory for send/recv buffers on host","PetscSFSetFromOptions",sf->use_pinned_buf,&sf->use_pinned_buf,NULL);CHKERRQ(ierr); 260 ierr = PetscOptionsTail();CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 265 { 266 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 271 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 272 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 273 #if defined(PETSC_HAVE_CUDA) 274 if (bas->irootloc_d) {cudaError_t err = cudaFree(bas->irootloc_d);CHKERRCUDA(err);bas->irootloc_d=NULL;} 275 #endif 276 ierr = PetscSFPackDestroyAvailable(sf,&bas->avail);CHKERRQ(ierr); 277 ierr = PetscSFPackDestroyOptimizations_Basic(sf);CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 282 { 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); 287 ierr = PetscFree(sf->data);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 292 { 293 PetscErrorCode ierr; 294 PetscBool iascii; 295 296 PetscFunctionBegin; 297 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 298 if (iascii) {ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 299 PetscFunctionReturn(0); 300 } 301 302 static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 303 { 304 PetscErrorCode ierr; 305 PetscSFPack link; 306 const PetscInt *rootloc = NULL; 307 MPI_Request *rootreqs = NULL,*leafreqs = NULL; 308 309 PetscFunctionBegin; 310 ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 311 ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr); 312 313 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 314 /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */ 315 ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr); 316 317 /* Do Isend */ 318 ierr = PetscSFPackRootData(sf,link,rootloc,rootdata,PETSC_TRUE);CHKERRQ(ierr); 319 ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr); 320 321 /* Do self to self communication via memcpy only when rootdata and leafdata are in different memory */ 322 if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);} 323 PetscFunctionReturn(0); 324 } 325 326 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 327 { 328 PetscErrorCode ierr; 329 PetscSFPack link; 330 const PetscInt *leafloc = NULL; 331 332 PetscFunctionBegin; 333 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 334 ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 335 ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr); 336 ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafdata,op,PETSC_TRUE);CHKERRQ(ierr); 337 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 341 /* leaf -> root with reduction */ 342 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 343 { 344 PetscErrorCode ierr; 345 PetscSFPack link; 346 const PetscInt *leafloc = NULL; 347 MPI_Request *rootreqs = NULL,*leafreqs = NULL; /* dummy values for compiler warnings about uninitialized value */ 348 349 PetscFunctionBegin; 350 ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc); 351 352 ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 353 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr); 354 /* Eagerly post root receives for non-distinguished ranks */ 355 ierr = MPI_Startall_irecv(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr); 356 357 /* Pack and send leaf data */ 358 ierr = PetscSFPackLeafData(sf,link,leafloc,leafdata,PETSC_TRUE);CHKERRQ(ierr); 359 ierr = MPI_Startall_isend(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr); 360 361 if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(rootmtype,leafmtype,link->selfbuf[rootmtype],link->selfbuf[leafmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);} 362 PetscFunctionReturn(0); 363 } 364 365 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 366 { 367 PetscErrorCode ierr; 368 PetscSFPack link; 369 const PetscInt *rootloc = NULL; 370 371 PetscFunctionBegin; 372 ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr); 373 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 374 ierr = PetscSFPackWaitall(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 375 ierr = PetscSFUnpackAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr); 376 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 381 { 382 PetscErrorCode ierr; 383 384 PetscFunctionBegin; 385 ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 390 { 391 PetscErrorCode ierr; 392 PetscSFPack link; 393 const PetscInt *rootloc = NULL,*leafloc = NULL; 394 MPI_Request *rootreqs = NULL,*leafreqs = NULL; 395 396 PetscFunctionBegin; 397 ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr); 398 ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr); 399 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 400 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 401 ierr = PetscSFPackWaitall(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 402 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 403 404 /* Post leaf receives */ 405 ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr); 406 407 /* Process local fetch-and-op, post root sends */ 408 ierr = PetscSFFetchAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr); 409 ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr); 410 if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);} 411 412 /* Unpack and insert fetched data into leaves */ 413 ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 414 ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafupdate,MPIU_REPLACE,PETSC_TRUE);CHKERRQ(ierr); 415 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 420 { 421 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 422 423 PetscFunctionBegin; 424 if (niranks) *niranks = bas->niranks; 425 if (iranks) *iranks = bas->iranks; 426 if (ioffset) *ioffset = bas->ioffset; 427 if (irootloc) *irootloc = bas->irootloc; 428 PetscFunctionReturn(0); 429 } 430 431 /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf. 432 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 433 was sorted before calling the routine. 434 */ 435 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 436 { 437 PetscSF esf; 438 PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count; 439 PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 440 PetscMPIInt *esf_ranks; 441 const PetscMPIInt *ranks,*iranks; 442 const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer; 443 PetscBool connected; 444 PetscSFPack link; 445 PetscSFNode *new_iremote; 446 PetscSF_Basic *bas; 447 PetscErrorCode ierr; 448 449 PetscFunctionBegin; 450 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 451 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 452 453 /* Find out which leaves are still connected to roots in the embedded sf */ 454 ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 455 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 456 /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */ 457 maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 458 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 459 /* Tag selected roots */ 460 for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 461 462 /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in 463 sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are 464 interested in since it tells which leaves are connected to which ranks. 465 */ 466 ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,rootdata,PETSC_MEMTYPE_HOST,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */ 467 ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 468 ierr = PetscSFPackWaitall(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 469 ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 470 esf_nranks = esf_ndranks = connected_leaves = 0; 471 for (i=0; i<nranks; i++) { 472 connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */ 473 buffer = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]); 474 count = roffset[i+1] - roffset[i]; 475 for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}} 476 if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 477 } 478 479 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 480 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 481 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 482 ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr); 483 p = 0; /* Counter for connected root ranks */ 484 q = 0; /* Counter for connected leaves */ 485 esf_roffset[0] = 0; 486 for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 487 buffer = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]); 488 connected = PETSC_FALSE; 489 for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) { 490 if (buffer[k]) { 491 esf_rmine[q] = new_ilocal[q] = rmine[j]; 492 esf_rremote[q] = rremote[j]; 493 new_iremote[q].index = rremote[j]; 494 new_iremote[q].rank = ranks[i]; 495 connected = PETSC_TRUE; 496 q++; 497 } 498 } 499 if (connected) { 500 esf_ranks[p] = ranks[i]; 501 esf_roffset[p+1] = q; 502 p++; 503 } 504 } 505 506 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 507 508 /* SetGraph internally resets the SF, so we only set its fields after the call */ 509 ierr = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 510 esf->nranks = esf_nranks; 511 esf->ndranks = esf_ndranks; 512 esf->ranks = esf_ranks; 513 esf->roffset = esf_roffset; 514 esf->rmine = esf_rmine; 515 esf->rremote = esf_rremote; 516 517 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 518 bas = (PetscSF_Basic*)esf->data; 519 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 520 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 521 expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once. 522 */ 523 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 524 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 525 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 526 p = 0; /* Counter for connected leaf ranks */ 527 q = 0; /* Counter for connected roots */ 528 for (i=0; i<niranks; i++) { 529 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 530 for (j=ioffset[i]; j<ioffset[i+1]; j++) { 531 PetscInt loc; 532 ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr); 533 if (loc >= 0) { /* Found in selected this root is connected */ 534 bas->irootloc[q++] = irootloc[j]; 535 connected = PETSC_TRUE; 536 } 537 } 538 if (connected) { 539 bas->niranks++; 540 if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 541 bas->iranks[p] = iranks[i]; 542 bas->ioffset[p+1] = q; 543 p++; 544 } 545 } 546 bas->itotal = q; 547 548 /* Setup packing optimizations */ 549 ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr); 550 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 551 552 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 553 *newsf = esf; 554 PetscFunctionReturn(0); 555 } 556 557 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 558 { 559 PetscSF esf; 560 PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal; 561 const PetscInt *ilocal,*ioffset,*irootloc,*buffer; 562 const PetscMPIInt *iranks; 563 PetscSFPack link; 564 PetscSFNode *new_iremote; 565 const PetscSFNode *iremote; 566 PetscSF_Basic *bas; 567 MPI_Group group; 568 PetscErrorCode ierr; 569 570 PetscFunctionBegin; 571 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 572 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 573 574 /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */ 575 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 576 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 577 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 578 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 579 for (i=0; i<nselected; i++) { 580 const PetscInt l = selected[i]; 581 new_ilocal[i] = ilocal ? ilocal[l] : l; 582 new_iremote[i].rank = iremote[l].rank; 583 new_iremote[i].index = iremote[l].index; 584 } 585 586 /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */ 587 maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 588 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 589 for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */ 590 591 ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 592 593 /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to 594 map rmine[i] (ilocal of leaves) back to selected[j] (leaf indices). 595 */ 596 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 597 ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr); 598 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 599 600 /* Set up the incoming communication (i.e., recv info) */ 601 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); 602 bas = (PetscSF_Basic*)esf->data; 603 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 604 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 605 606 /* Pass info about selected leaves to root buffer */ 607 ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,leafdata-minleaf,PETSC_MEMTYPE_HOST,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */ 608 ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 609 ierr = PetscSFPackWaitall(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 610 611 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 612 p = 0; /* Counter for connected leaf ranks */ 613 q = 0; /* Counter for connected roots */ 614 for (i=0; i<niranks; i++) { 615 PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 616 buffer = i < ndiranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->rootbuf[PETSC_MEMTYPE_HOST] + (ioffset[i] - ioffset[ndiranks]); 617 for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) { 618 if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;} 619 } 620 if (connected) { 621 bas->niranks++; 622 if (i<ndiranks) bas->ndiranks++; 623 bas->iranks[p] = iranks[i]; 624 bas->ioffset[p+1] = q; 625 p++; 626 } 627 } 628 bas->itotal = q; 629 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 630 631 /* Setup packing optimizations */ 632 ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr); 633 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 634 635 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 636 *newsf = esf; 637 PetscFunctionReturn(0); 638 } 639 640 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 641 { 642 PetscSF_Basic *dat; 643 PetscErrorCode ierr; 644 645 PetscFunctionBegin; 646 sf->ops->SetUp = PetscSFSetUp_Basic; 647 sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic; 648 sf->ops->Reset = PetscSFReset_Basic; 649 sf->ops->Destroy = PetscSFDestroy_Basic; 650 sf->ops->View = PetscSFView_Basic; 651 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic; 652 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 653 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 654 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 655 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 656 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 657 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 658 sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 659 sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic; 660 661 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 662 sf->data = (void*)dat; 663 PetscFunctionReturn(0); 664 } 665