1 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /*===================================================================================*/ 5 /* Internal routines for PetscSFPack */ 6 /*===================================================================================*/ 7 8 /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been 9 initialized (since we use persistent requests), then initialize them. 10 */ 11 static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs) 12 { 13 PetscErrorCode ierr; 14 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 15 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks,disp; 16 const PetscInt *rootoffset,*leafoffset; 17 PetscMPIInt n; 18 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 19 MPI_Datatype unit = link->unit; 20 PetscMemType rootmtype,leafmtype; 21 22 PetscFunctionBegin; 23 if (use_gpu_aware_mpi) { 24 rootmtype = link->rootmtype; 25 leafmtype = link->leafmtype; 26 } else { 27 rootmtype = PETSC_MEMTYPE_HOST; 28 leafmtype = PETSC_MEMTYPE_HOST; 29 } 30 31 if (rootreqs && !link->rootreqsinited[direction][rootmtype]) { 32 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 33 if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 34 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 35 disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes; 36 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 37 ierr = MPI_Recv_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr); 38 } 39 } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 40 for (i=ndrootranks,j=0; i<nrootranks; i++,j++) { 41 disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes; 42 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 43 ierr = MPI_Send_init(link->rootbuf[rootmtype]+disp,n,unit,bas->iranks[i],link->tag,comm,&link->rootreqs[direction][rootmtype][j]);CHKERRQ(ierr); 44 } 45 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction); 46 link->rootreqsinited[direction][rootmtype] = PETSC_TRUE; 47 } 48 49 if (leafreqs && !link->leafreqsinited[direction][leafmtype]) { 50 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 51 if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 52 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 53 disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes; 54 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 55 ierr = MPI_Send_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr); 56 } 57 } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 58 for (i=ndleafranks,j=0; i<nleafranks; i++,j++) { 59 disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes; 60 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 61 ierr = MPI_Recv_init(link->leafbuf[leafmtype]+disp,n,unit,sf->ranks[i],link->tag,comm,&link->leafreqs[direction][leafmtype][j]);CHKERRQ(ierr); 62 } 63 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %d\n",(int)direction); 64 link->leafreqsinited[direction][leafmtype] = PETSC_TRUE; 65 } 66 67 if (rootreqs) *rootreqs = link->rootreqs[direction][rootmtype]; 68 if (leafreqs) *leafreqs = link->leafreqs[direction][leafmtype]; 69 PetscFunctionReturn(0); 70 } 71 72 /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */ 73 PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscInt nrootreqs,PetscInt nleafreqs,PetscSFPack *mylink) 74 { 75 PetscErrorCode ierr; 76 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 77 PetscInt i,j,nreqs,nrootranks,ndrootranks,nleafranks,ndleafranks; 78 const PetscInt *rootoffset,*leafoffset; 79 PetscSFPack *p,link; 80 PetscBool match; 81 82 PetscFunctionBegin; 83 ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);CHKERRQ(ierr); 84 85 /* Look for types in cache */ 86 for (p=&bas->avail; (link=*p); p=&link->next) { 87 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 88 if (match) { 89 *p = link->next; /* Remove from available list */ 90 goto found; 91 } 92 } 93 94 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 95 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 96 ierr = PetscNew(&link);CHKERRQ(ierr); 97 ierr = PetscSFPackSetUp_Host(sf,link,unit);CHKERRQ(ierr); 98 ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* One tag per link */ 99 100 /* Allocate root, leaf, self buffers, and MPI requests */ 101 link->rootbuflen = rootoffset[nrootranks]-rootoffset[ndrootranks]; 102 link->leafbuflen = leafoffset[nleafranks]-leafoffset[ndleafranks]; 103 link->selfbuflen = rootoffset[ndrootranks]*link->unitbytes; 104 link->nrootreqs = nrootreqs; 105 link->nleafreqs = nleafreqs; 106 nreqs = (nrootreqs+nleafreqs)*4; /* Quadruple the requests since there are two communication directions and two memory types */ 107 ierr = PetscMalloc1(nreqs,&link->reqs);CHKERRQ(ierr); 108 for (i=0; i<nreqs; i++) link->reqs[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */ 109 110 for (i=0; i<2; i++) { /* Two communication directions */ 111 for (j=0; j<2; j++) { /* Two memory types */ 112 link->rootreqs[i][j] = link->reqs + nrootreqs*(2*i+j); 113 link->leafreqs[i][j] = link->reqs + nrootreqs*4 + nleafreqs*(2*i+j); 114 } 115 } 116 117 found: 118 link->rootmtype = rootmtype; 119 link->leafmtype = leafmtype; 120 #if defined(PETSC_HAVE_CUDA) 121 ierr = PetscSFPackSetUp_Device(sf,link,unit);CHKERRQ(ierr); 122 if (!use_gpu_aware_mpi) { 123 /* If not using GPU aware MPI, we always need buffers on host. In case root/leafdata is on device, we copy root/leafdata to/from 124 these buffers for MPI. We only need buffers for remote neighbors since self-to-self communication is not done via MPI. 125 */ 126 if (!link->rootbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 127 if (!link->leafbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 128 } 129 #endif 130 if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);} 131 if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);} 132 if (!link->selfbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[rootmtype]);CHKERRQ(ierr);} 133 if (rootmtype != leafmtype && !link->selfbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,link->selfbuflen*link->unitbytes,(void**)&link->selfbuf[leafmtype]);CHKERRQ(ierr);} 134 link->rkey = rootdata; 135 link->lkey = leafdata; 136 link->next = bas->inuse; 137 bas->inuse = link; 138 139 *mylink = link; 140 PetscFunctionReturn(0); 141 } 142 143 static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscSFDirection direction,PetscSFPack *mylink) 144 { 145 PetscErrorCode ierr; 146 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks; 147 148 PetscFunctionBegin; 149 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr); 150 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 151 ierr = PetscSFPackGet_Basic_Common(sf,unit,rootmtype,rootdata,leafmtype,leafdata,nrootranks-ndrootranks,nleafranks-ndleafranks,mylink);CHKERRQ(ierr); 152 PetscFunctionReturn(0); 153 } 154 155 /*===================================================================================*/ 156 /* SF public interface implementations */ 157 /*===================================================================================*/ 158 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 159 { 160 PetscErrorCode ierr; 161 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 162 PetscInt *rlengths,*ilengths,i; 163 PetscMPIInt rank,niranks,*iranks,tag; 164 MPI_Comm comm; 165 MPI_Group group; 166 MPI_Request *rootreqs,*leafreqs; 167 168 PetscFunctionBegin; 169 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 170 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 171 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 172 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 173 ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 174 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 175 /* 176 * Inform roots about how many leaves and from which ranks 177 */ 178 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 179 /* Determine number, sending ranks, and length of incoming */ 180 for (i=0; i<sf->nranks; i++) { 181 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 182 } 183 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 184 185 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 186 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 187 small and the sorting is cheap. 188 */ 189 ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 190 191 /* Partition into distinguished and non-distinguished incoming ranks */ 192 bas->ndiranks = sf->ndranks; 193 bas->niranks = bas->ndiranks + niranks; 194 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 195 bas->ioffset[0] = 0; 196 for (i=0; i<bas->ndiranks; i++) { 197 bas->iranks[i] = sf->ranks[i]; 198 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 199 } 200 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 201 for ( ; i<bas->niranks; i++) { 202 bas->iranks[i] = iranks[i-bas->ndiranks]; 203 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 204 } 205 bas->itotal = bas->ioffset[i]; 206 ierr = PetscFree(rlengths);CHKERRQ(ierr); 207 ierr = PetscFree(iranks);CHKERRQ(ierr); 208 ierr = PetscFree(ilengths);CHKERRQ(ierr); 209 210 /* Send leaf identities to roots */ 211 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 212 ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 213 for (i=bas->ndiranks; i<bas->niranks; i++) { 214 ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 215 } 216 for (i=0; i<sf->nranks; i++) { 217 PetscMPIInt npoints; 218 ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 219 if (i < sf->ndranks) { 220 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 221 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 222 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 223 ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 224 continue; 225 } 226 ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 227 } 228 ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 229 ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 230 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 231 232 sf->selfleafdups = PETSC_TRUE; /* The conservative assumption is there are data race */ 233 sf->remoteleafdups = PETSC_TRUE; 234 bas->selfrootdups = PETSC_TRUE; 235 bas->remoterootdups = PETSC_TRUE; 236 237 /* Setup packing optimization for roots and leaves */ 238 ierr = PetscSFPackSetupOptimizations_Basic(sf);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf) 243 { 244 PetscErrorCode ierr; 245 246 PetscFunctionBegin; 247 ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr); 248 ierr = PetscOptionsTail();CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 252 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 253 { 254 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 255 PetscErrorCode ierr; 256 257 PetscFunctionBegin; 258 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 259 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 260 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 261 #if defined(PETSC_HAVE_CUDA) 262 if (bas->irootloc_d) {cudaError_t err = cudaFree(bas->irootloc_d);CHKERRCUDA(err);bas->irootloc_d=NULL;} 263 #endif 264 ierr = PetscSFPackDestroyAvailable(&bas->avail);CHKERRQ(ierr); 265 ierr = PetscSFPackDestroyOptimizations_Basic(sf);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 270 { 271 PetscErrorCode ierr; 272 273 PetscFunctionBegin; 274 ierr = PetscSFReset(sf);CHKERRQ(ierr); 275 ierr = PetscFree(sf->data);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 280 { 281 PetscErrorCode ierr; 282 PetscBool iascii; 283 284 PetscFunctionBegin; 285 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 286 if (iascii) {ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 287 PetscFunctionReturn(0); 288 } 289 290 static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 291 { 292 PetscErrorCode ierr; 293 PetscSFPack link; 294 const PetscInt *rootloc = NULL; 295 MPI_Request *rootreqs,*leafreqs; 296 297 PetscFunctionBegin; 298 ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 299 ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr); 300 301 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 302 /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */ 303 ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr); 304 305 /* Do Isend */ 306 ierr = PetscSFPackRootData(sf,link,rootloc,rootdata,PETSC_TRUE);CHKERRQ(ierr); 307 ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr); 308 309 /* Do self to self communication via memcpy only when rootdata and leafdata are in different memory */ 310 if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);} 311 PetscFunctionReturn(0); 312 } 313 314 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 315 { 316 PetscErrorCode ierr; 317 PetscSFPack link; 318 const PetscInt *leafloc = NULL; 319 320 PetscFunctionBegin; 321 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 322 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 323 ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr); 324 ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafdata,op,PETSC_TRUE);CHKERRQ(ierr); 325 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 /* leaf -> root with reduction */ 330 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 331 { 332 PetscErrorCode ierr; 333 PetscSFPack link; 334 const PetscInt *leafloc = NULL; 335 MPI_Request *rootreqs = NULL,*leafreqs = NULL; /* dummy values for compiler warnings about uninitialized value */ 336 337 PetscFunctionBegin; 338 ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc); 339 340 ierr = PetscSFPackGet_Basic(sf,unit,rootmtype,rootdata,leafmtype,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 341 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr); 342 /* Eagerly post root receives for non-distinguished ranks */ 343 ierr = MPI_Startall_irecv(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr); 344 345 /* Pack and send leaf data */ 346 ierr = PetscSFPackLeafData(sf,link,leafloc,leafdata,PETSC_TRUE);CHKERRQ(ierr); 347 ierr = MPI_Startall_isend(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr); 348 349 if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(rootmtype,leafmtype,link->selfbuf[rootmtype],link->selfbuf[leafmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);} 350 PetscFunctionReturn(0); 351 } 352 353 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 354 { 355 PetscErrorCode ierr; 356 PetscSFPack link; 357 const PetscInt *rootloc = NULL; 358 359 PetscFunctionBegin; 360 ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr); 361 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 362 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 363 ierr = PetscSFUnpackAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr); 364 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 369 { 370 PetscErrorCode ierr; 371 372 PetscFunctionBegin; 373 ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 378 { 379 PetscErrorCode ierr; 380 PetscSFPack link; 381 const PetscInt *rootloc = NULL,*leafloc = NULL; 382 MPI_Request *rootreqs,*leafreqs; 383 384 PetscFunctionBegin; 385 ierr = PetscSFGetRootIndicesWithMemType_Basic(sf,rootmtype,&rootloc);CHKERRQ(ierr); 386 ierr = PetscSFGetLeafIndicesWithMemType_Basic(sf,leafmtype,&leafloc);CHKERRQ(ierr); 387 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 388 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 389 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 390 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 391 392 /* Post leaf receives */ 393 ierr = MPI_Startall_irecv(link->leafbuflen,unit,link->nleafreqs,leafreqs);CHKERRQ(ierr); 394 395 /* Process local fetch-and-op, post root sends */ 396 ierr = PetscSFFetchAndOpRootData(sf,link,rootloc,rootdata,op,PETSC_TRUE);CHKERRQ(ierr); 397 ierr = MPI_Startall_isend(link->rootbuflen,unit,link->nrootreqs,rootreqs);CHKERRQ(ierr); 398 if (rootmtype != leafmtype) {ierr = PetscMemcpyWithMemType(leafmtype,rootmtype,link->selfbuf[leafmtype],link->selfbuf[rootmtype],link->selfbuflen*link->unitbytes);CHKERRQ(ierr);} 399 400 /* Unpack and insert fetched data into leaves */ 401 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 402 ierr = PetscSFUnpackAndOpLeafData(sf,link,leafloc,leafupdate,MPIU_REPLACE,PETSC_TRUE);CHKERRQ(ierr); 403 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 408 { 409 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 410 411 PetscFunctionBegin; 412 if (niranks) *niranks = bas->niranks; 413 if (iranks) *iranks = bas->iranks; 414 if (ioffset) *ioffset = bas->ioffset; 415 if (irootloc) *irootloc = bas->irootloc; 416 PetscFunctionReturn(0); 417 } 418 419 /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf. 420 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 421 was sorted before calling the routine. 422 */ 423 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 424 { 425 PetscSF esf; 426 PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count; 427 PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 428 PetscMPIInt *esf_ranks; 429 const PetscMPIInt *ranks,*iranks; 430 const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer; 431 PetscBool connected; 432 PetscSFPack link; 433 PetscSFNode *new_iremote; 434 PetscSF_Basic *bas; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 439 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 440 441 /* Find out which leaves are still connected to roots in the embedded sf */ 442 ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 443 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 444 /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */ 445 maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 446 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 447 /* Tag selected roots */ 448 for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 449 450 /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in 451 sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are 452 interested in since it tells which leaves are connected to which ranks. 453 */ 454 ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,rootdata,PETSC_MEMTYPE_HOST,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */ 455 ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 456 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 457 ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 458 esf_nranks = esf_ndranks = connected_leaves = 0; 459 for (i=0; i<nranks; i++) { 460 connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */ 461 buffer = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]); 462 count = roffset[i+1] - roffset[i]; 463 for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}} 464 if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 465 } 466 467 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 468 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 469 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 470 ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr); 471 p = 0; /* Counter for connected root ranks */ 472 q = 0; /* Counter for connected leaves */ 473 esf_roffset[0] = 0; 474 for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 475 buffer = i < ndranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->leafbuf[PETSC_MEMTYPE_HOST] + (roffset[i] - roffset[ndranks]); 476 connected = PETSC_FALSE; 477 for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) { 478 if (buffer[k]) { 479 esf_rmine[q] = new_ilocal[q] = rmine[j]; 480 esf_rremote[q] = rremote[j]; 481 new_iremote[q].index = rremote[j]; 482 new_iremote[q].rank = ranks[i]; 483 connected = PETSC_TRUE; 484 q++; 485 } 486 } 487 if (connected) { 488 esf_ranks[p] = ranks[i]; 489 esf_roffset[p+1] = q; 490 p++; 491 } 492 } 493 494 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 495 496 /* SetGraph internally resets the SF, so we only set its fields after the call */ 497 ierr = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 498 esf->nranks = esf_nranks; 499 esf->ndranks = esf_ndranks; 500 esf->ranks = esf_ranks; 501 esf->roffset = esf_roffset; 502 esf->rmine = esf_rmine; 503 esf->rremote = esf_rremote; 504 505 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 506 bas = (PetscSF_Basic*)esf->data; 507 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 508 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 509 expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once. 510 */ 511 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 512 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 513 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 514 p = 0; /* Counter for connected leaf ranks */ 515 q = 0; /* Counter for connected roots */ 516 for (i=0; i<niranks; i++) { 517 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 518 for (j=ioffset[i]; j<ioffset[i+1]; j++) { 519 PetscInt loc; 520 ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr); 521 if (loc >= 0) { /* Found in selected this root is connected */ 522 bas->irootloc[q++] = irootloc[j]; 523 connected = PETSC_TRUE; 524 } 525 } 526 if (connected) { 527 bas->niranks++; 528 if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 529 bas->iranks[p] = iranks[i]; 530 bas->ioffset[p+1] = q; 531 p++; 532 } 533 } 534 bas->itotal = q; 535 536 /* Setup packing optimizations */ 537 ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr); 538 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 539 540 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 541 *newsf = esf; 542 PetscFunctionReturn(0); 543 } 544 545 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 546 { 547 PetscSF esf; 548 PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal; 549 const PetscInt *ilocal,*ioffset,*irootloc,*buffer; 550 const PetscMPIInt *iranks; 551 PetscSFPack link; 552 PetscSFNode *new_iremote; 553 const PetscSFNode *iremote; 554 PetscSF_Basic *bas; 555 MPI_Group group; 556 PetscErrorCode ierr; 557 558 PetscFunctionBegin; 559 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 560 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 561 562 /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */ 563 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 564 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 565 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 566 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 567 for (i=0; i<nselected; i++) { 568 const PetscInt l = selected[i]; 569 new_ilocal[i] = ilocal ? ilocal[l] : l; 570 new_iremote[i].rank = iremote[l].rank; 571 new_iremote[i].index = iremote[l].index; 572 } 573 574 /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */ 575 maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 576 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 577 for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */ 578 579 ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 580 581 /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to 582 map rmine[i] (ilocal of leaves) back to selected[j] (leaf indices). 583 */ 584 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 585 ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr); 586 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 587 588 /* Set up the incoming communication (i.e., recv info) */ 589 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); 590 bas = (PetscSF_Basic*)esf->data; 591 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 592 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 593 594 /* Pass info about selected leaves to root buffer */ 595 ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,PETSC_MEMTYPE_HOST,leafdata-minleaf,PETSC_MEMTYPE_HOST,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */ 596 ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 597 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 598 599 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 600 p = 0; /* Counter for connected leaf ranks */ 601 q = 0; /* Counter for connected roots */ 602 for (i=0; i<niranks; i++) { 603 PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 604 buffer = i < ndiranks? (PetscInt*)link->selfbuf[PETSC_MEMTYPE_HOST] : (PetscInt*)link->rootbuf[PETSC_MEMTYPE_HOST] + (ioffset[i] - ioffset[ndiranks]); 605 for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) { 606 if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;} 607 } 608 if (connected) { 609 bas->niranks++; 610 if (i<ndiranks) bas->ndiranks++; 611 bas->iranks[p] = iranks[i]; 612 bas->ioffset[p+1] = q; 613 p++; 614 } 615 } 616 bas->itotal = q; 617 ierr = PetscSFPackReclaim(sf,&link);CHKERRQ(ierr); 618 619 /* Setup packing optimizations */ 620 ierr = PetscSFPackSetupOptimizations_Basic(esf);CHKERRQ(ierr); 621 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 622 623 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 624 *newsf = esf; 625 PetscFunctionReturn(0); 626 } 627 628 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 629 { 630 PetscSF_Basic *dat; 631 PetscErrorCode ierr; 632 633 PetscFunctionBegin; 634 sf->ops->SetUp = PetscSFSetUp_Basic; 635 sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic; 636 sf->ops->Reset = PetscSFReset_Basic; 637 sf->ops->Destroy = PetscSFDestroy_Basic; 638 sf->ops->View = PetscSFView_Basic; 639 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic; 640 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 641 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 642 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 643 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 644 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 645 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 646 sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 647 sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic; 648 649 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 650 sf->data = (void*)dat; 651 PetscFunctionReturn(0); 652 } 653