1 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /*===================================================================================*/ 5 /* Internal routines for PetscSFPack_Basic */ 6 /*===================================================================================*/ 7 8 /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been 9 initialized (since we use persistent requests), then initialize them. 10 */ 11 static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs) 12 { 13 PetscErrorCode ierr; 14 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 15 MPI_Request *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half; 16 PetscInt i,j,nrootranks,ndrootranks,nleafranks,ndleafranks,disp; 17 const PetscInt *rootoffset,*leafoffset; 18 PetscMPIInt n; 19 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 20 MPI_Datatype unit; 21 22 PetscFunctionBegin; 23 if (!link->initialized[direction]) { 24 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 25 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 26 unit = link->unit; 27 j = 0; /* request counter */ 28 if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 29 for (i=ndrootranks; i<nrootranks; i++) { 30 disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes; 31 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 32 ierr = MPI_Recv_init(link->rootbuf+disp,n,unit,bas->iranks[i],link->tag,comm,&requests[j++]);CHKERRQ(ierr); 33 } 34 for (i=ndleafranks; i<nleafranks; i++) { 35 disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes; 36 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 37 ierr = MPI_Send_init(link->leafbuf+disp,n,unit,sf->ranks[i],link->tag,comm,&requests[j++]);CHKERRQ(ierr); 38 } 39 } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 40 for (i=ndrootranks; i<nrootranks; i++) { 41 disp = (rootoffset[i] - rootoffset[ndrootranks])*link->unitbytes; 42 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 43 ierr = MPI_Send_init(link->rootbuf+disp,n,unit,bas->iranks[i],link->tag,comm,&requests[j++]);CHKERRQ(ierr); 44 } 45 for (i=ndleafranks; i<nleafranks; i++) { 46 disp = (leafoffset[i] - leafoffset[ndleafranks])*link->unitbytes; 47 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 48 ierr = MPI_Recv_init(link->leafbuf+disp,n,unit,sf->ranks[i],link->tag,comm,&requests[j++]);CHKERRQ(ierr); 49 } 50 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction); 51 52 link->initialized[direction] = PETSC_TRUE; 53 } 54 55 if (rootreqs) *rootreqs = requests; 56 if (leafreqs) *leafreqs = requests + (bas->niranks - bas->ndiranks); 57 PetscFunctionReturn(0); 58 } 59 60 /* Common part shared by SFBasic and SFNeighbor based on the fact they all deal with sparse graphs. */ 61 PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF sf,MPI_Datatype unit,const void *rootdata,const void *leafdata,PetscInt half,PetscSFPack_Basic *mylink) 62 { 63 PetscErrorCode ierr; 64 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 65 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks,rbufsize,lbufsize,sbufsize; 66 const PetscInt *rootoffset,*leafoffset; 67 PetscSFPack *p; 68 PetscSFPack_Basic link; 69 PetscBool match; 70 71 PetscFunctionBegin; 72 ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rootdata,leafdata);CHKERRQ(ierr); 73 74 /* Look for types in cache */ 75 for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) { 76 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 77 if (match) { 78 *p = link->next; /* Remove from available list */ 79 goto found; 80 } 81 } 82 83 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 84 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 85 ierr = PetscNew(&link);CHKERRQ(ierr); 86 ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 87 ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); /* One tag per link */ 88 89 /* Allocate root, leaf, self buffers, and MPI requests. We double the requests since there are leaf2root and root2leaf communications. */ 90 rbufsize = (rootoffset[nrootranks]-rootoffset[ndrootranks])*link->unitbytes; 91 lbufsize = (leafoffset[nleafranks]-leafoffset[ndleafranks])*link->unitbytes; 92 sbufsize = rootoffset[ndrootranks]*link->unitbytes; 93 link->half = half; 94 ierr = PetscMalloc4(rbufsize,&link->rootbuf,lbufsize,&link->leafbuf,sbufsize,&link->selfbuf,link->half*2,&link->requests);CHKERRQ(ierr); 95 for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Initialized to NULL so that we know which need to be freed in Destroy */ 96 97 found: 98 link->rkey = rootdata; 99 link->lkey = leafdata; 100 link->next = bas->inuse; 101 bas->inuse = (PetscSFPack)link; 102 103 *mylink = link; 104 PetscFunctionReturn(0); 105 } 106 107 static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,const void *leafdata,PetscSFDirection direction,PetscSFPack_Basic *mylink) 108 { 109 PetscErrorCode ierr; 110 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks,half; 111 112 PetscFunctionBegin; 113 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,NULL,NULL);CHKERRQ(ierr); 114 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 115 half = (nrootranks - ndrootranks) + (nleafranks - ndleafranks); 116 ierr = PetscSFPackGet_Basic_Common(sf,unit,rootdata,leafdata,half,mylink);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 /*===================================================================================*/ 121 /* SF public interface implementations */ 122 /*===================================================================================*/ 123 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 124 { 125 PetscErrorCode ierr; 126 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 127 PetscInt *rlengths,*ilengths,i; 128 PetscMPIInt rank,niranks,*iranks,tag; 129 MPI_Comm comm; 130 MPI_Group group; 131 MPI_Request *rootreqs,*leafreqs; 132 133 PetscFunctionBegin; 134 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 135 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 136 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 137 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 138 ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 139 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 140 /* 141 * Inform roots about how many leaves and from which ranks 142 */ 143 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 144 /* Determine number, sending ranks, and length of incoming */ 145 for (i=0; i<sf->nranks; i++) { 146 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 147 } 148 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 149 150 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 151 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 152 small and the sorting is cheap. 153 */ 154 ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 155 156 /* Partition into distinguished and non-distinguished incoming ranks */ 157 bas->ndiranks = sf->ndranks; 158 bas->niranks = bas->ndiranks + niranks; 159 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 160 bas->ioffset[0] = 0; 161 for (i=0; i<bas->ndiranks; i++) { 162 bas->iranks[i] = sf->ranks[i]; 163 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 164 } 165 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 166 for ( ; i<bas->niranks; i++) { 167 bas->iranks[i] = iranks[i-bas->ndiranks]; 168 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 169 } 170 bas->itotal = bas->ioffset[i]; 171 ierr = PetscFree(rlengths);CHKERRQ(ierr); 172 ierr = PetscFree(iranks);CHKERRQ(ierr); 173 ierr = PetscFree(ilengths);CHKERRQ(ierr); 174 175 /* Send leaf identities to roots */ 176 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 177 ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 178 for (i=bas->ndiranks; i<bas->niranks; i++) { 179 ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 180 } 181 for (i=0; i<sf->nranks; i++) { 182 PetscMPIInt npoints; 183 ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 184 if (i < sf->ndranks) { 185 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 186 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 187 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 188 ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 189 continue; 190 } 191 ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 192 } 193 ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 194 ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 195 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 196 197 /* Setup packing optimization for roots and leaves */ 198 ierr = PetscSFPackSetupOptimization_Basic(sf);CHKERRQ(ierr); 199 PetscFunctionReturn(0); 200 } 201 202 static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf) 203 { 204 PetscErrorCode ierr; 205 206 PetscFunctionBegin; 207 ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr); 208 ierr = PetscOptionsTail();CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 213 { 214 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 215 PetscErrorCode ierr; 216 PetscSFPack_Basic link,next; 217 PetscInt i; 218 219 PetscFunctionBegin; 220 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 221 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 222 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 223 for (link=(PetscSFPack_Basic)bas->avail; link; link=next) { 224 next = (PetscSFPack_Basic)link->next; 225 if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 226 for (i=0; i<link->half*2; i++) { /* Persistent reqs must be freed. */ 227 if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);} 228 } 229 ierr = PetscFree4(link->rootbuf,link->leafbuf,link->selfbuf,link->requests);CHKERRQ(ierr); 230 ierr = PetscFree(link);CHKERRQ(ierr); 231 } 232 bas->avail = NULL; 233 ierr = PetscSFPackDestroyOptimization_Basic(sf);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 238 { 239 PetscErrorCode ierr; 240 241 PetscFunctionBegin; 242 ierr = PetscSFReset(sf);CHKERRQ(ierr); 243 ierr = PetscFree(sf->data);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 248 { 249 PetscErrorCode ierr; 250 PetscBool iascii; 251 252 PetscFunctionBegin; 253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 254 if (iascii) {ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 259 { 260 PetscErrorCode ierr; 261 PetscSFPack_Basic link; 262 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks,cnt; 263 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 264 const PetscMPIInt *rootranks,*leafranks; 265 MPI_Request *rootreqs,*leafreqs; 266 267 PetscFunctionBegin; 268 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr); 269 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 270 ierr = PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 271 ierr = PetscSFGetRootIndicesAtPlace_Basic(sf,PETSC_FALSE,&rootloc); 272 273 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 274 /* Post Irecv. Note distinguished ranks receive data via shared memory (i.e., not via MPI) */ 275 cnt = leafoffset[nleafranks]-leafoffset[ndleafranks]; 276 ierr = MPI_Startall_irecv(cnt,unit,nleafranks-ndleafranks,leafreqs);CHKERRQ(ierr); 277 278 /* Do Isend */ 279 ierr = PetscSFPackRootData(sf,(PetscSFPack)link,nrootranks,ndrootranks,rootoffset,rootloc,rootdata);CHKERRQ(ierr); 280 cnt = rootoffset[nrootranks] - rootoffset[ndrootranks]; 281 ierr = MPI_Startall_isend(cnt,unit,nrootranks-ndrootranks,rootreqs);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 286 { 287 PetscErrorCode ierr; 288 PetscSFPack_Basic link; 289 PetscInt nleafranks,ndleafranks; 290 const PetscInt *leafoffset,*leafloc; 291 292 PetscFunctionBegin; 293 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 294 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 295 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 296 ierr = PetscSFGetLeafIndicesAtPlace_Basic(sf,PETSC_FALSE,&leafloc); 297 ierr = PetscSFUnpackAndOpLeafData(sf,(PetscSFPack)link,nleafranks,ndleafranks,leafoffset,leafloc,leafdata,op);CHKERRQ(ierr); 298 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 /* leaf -> root with reduction */ 303 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 304 { 305 PetscSFPack_Basic link; 306 PetscErrorCode ierr; 307 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks,cnt; 308 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 309 const PetscMPIInt *rootranks,*leafranks; 310 MPI_Request *rootreqs,*leafreqs; 311 312 PetscFunctionBegin; 313 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 314 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,NULL,NULL);CHKERRQ(ierr); 315 ierr = PetscSFGetLeafIndicesAtPlace_Basic(sf,PETSC_FALSE,&leafloc); 316 317 ierr = PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 318 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr); 319 /* Eagerly post root receives for non-distinguished ranks */ 320 cnt = rootoffset[nrootranks]-rootoffset[ndrootranks]; 321 ierr = MPI_Startall_irecv(cnt,unit,nrootranks-ndrootranks,rootreqs);CHKERRQ(ierr); 322 323 /* Pack and send leaf data */ 324 ierr = PetscSFPackLeafData(sf,(PetscSFPack)link,nleafranks,ndleafranks,leafoffset,leafloc,leafdata);CHKERRQ(ierr); 325 cnt = leafoffset[nleafranks] - leafoffset[ndleafranks]; 326 ierr = MPI_Startall_isend(cnt,unit,nleafranks-ndleafranks,leafreqs);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 331 { 332 PetscErrorCode ierr; 333 PetscSFPack_Basic link; 334 PetscInt nrootranks,ndrootranks; 335 const PetscInt *rootoffset,*rootloc; 336 337 PetscFunctionBegin; 338 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 339 ierr = PetscSFGetRootIndicesAtPlace_Basic(sf,PETSC_FALSE,&rootloc); 340 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 341 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 342 ierr = PetscSFUnpackAndOpRootData(sf,(PetscSFPack)link,nrootranks,ndrootranks,rootoffset,rootloc,rootdata,op);CHKERRQ(ierr); 343 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 348 { 349 PetscErrorCode ierr; 350 351 PetscFunctionBegin; 352 ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 357 { 358 PetscErrorCode ierr; 359 PetscSFPack_Basic link; 360 PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks,cnt; 361 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 362 const PetscMPIInt *rootranks,*leafranks; 363 MPI_Request *rootreqs,*leafreqs; 364 365 PetscFunctionBegin; 366 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr); 367 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,NULL,NULL);CHKERRQ(ierr); 368 ierr = PetscSFGetRootIndicesAtPlace_Basic(sf,PETSC_FALSE,&rootloc);CHKERRQ(ierr); 369 ierr = PetscSFGetLeafIndicesAtPlace_Basic(sf,PETSC_FALSE,&leafloc);CHKERRQ(ierr); 370 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 371 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 372 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 373 ierr = PetscSFPackGetReqs_Basic(sf,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 374 375 /* Post leaf receives */ 376 cnt = leafoffset[nleafranks]-leafoffset[ndleafranks]; 377 ierr = MPI_Startall_irecv(cnt,unit,nleafranks-ndleafranks,leafreqs);CHKERRQ(ierr); 378 379 /* Process local fetch-and-op, post root sends */ 380 ierr = PetscSFFetchAndOpRootData(sf,(PetscSFPack)link,nrootranks,ndrootranks,rootoffset,rootloc,rootdata,op);CHKERRQ(ierr); 381 cnt = rootoffset[nrootranks] - rootoffset[ndrootranks]; 382 ierr = MPI_Startall_isend(cnt,unit,nrootranks-ndrootranks,rootreqs);CHKERRQ(ierr); 383 384 /* Unpack and insert fetched data into leaves */ 385 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 386 ierr = PetscSFUnpackAndOpLeafData(sf,(PetscSFPack)link,nleafranks,ndleafranks,leafoffset,leafloc,leafupdate,MPIU_REPLACE);CHKERRQ(ierr); 387 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 392 { 393 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 394 395 PetscFunctionBegin; 396 if (niranks) *niranks = bas->niranks; 397 if (iranks) *iranks = bas->iranks; 398 if (ioffset) *ioffset = bas->ioffset; 399 if (irootloc) *irootloc = bas->irootloc; 400 PetscFunctionReturn(0); 401 } 402 403 /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf. 404 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 405 was sorted before calling the routine. 406 */ 407 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 408 { 409 PetscSF esf; 410 PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote,count; 411 PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 412 PetscMPIInt *esf_ranks; 413 const PetscMPIInt *ranks,*iranks; 414 const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc,*buffer; 415 PetscBool connected; 416 PetscSFPack_Basic link; 417 PetscSFNode *new_iremote; 418 PetscSF_Basic *bas; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 423 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 424 425 /* Find out which leaves are still connected to roots in the embedded sf */ 426 ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 427 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 428 /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */ 429 maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 430 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 431 /* Tag selected roots */ 432 for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 433 434 /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in 435 sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are 436 interested in since it tells which leaves are connected to which ranks. 437 */ 438 ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,rootdata,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */ 439 ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 440 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 441 ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 442 esf_nranks = esf_ndranks = connected_leaves = 0; 443 for (i=0; i<nranks; i++) { 444 connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */ 445 buffer = i < ndranks? (PetscInt*)link->selfbuf : (PetscInt*)link->leafbuf + (roffset[i] - roffset[ndranks]); 446 count = roffset[i+1] - roffset[i]; 447 for (j=0; j<count; j++) {if (buffer[j]) {connected_leaves++; connected = PETSC_TRUE;}} 448 if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 449 } 450 451 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 452 ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 453 ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 454 ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr); 455 p = 0; /* Counter for connected root ranks */ 456 q = 0; /* Counter for connected leaves */ 457 esf_roffset[0] = 0; 458 for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 459 buffer = i < ndranks? (PetscInt*)link->selfbuf : (PetscInt*)link->leafbuf + (roffset[i] - roffset[ndranks]); 460 connected = PETSC_FALSE; 461 for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) { 462 if (buffer[k]) { 463 esf_rmine[q] = new_ilocal[q] = rmine[j]; 464 esf_rremote[q] = rremote[j]; 465 new_iremote[q].index = rremote[j]; 466 new_iremote[q].rank = ranks[i]; 467 connected = PETSC_TRUE; 468 q++; 469 } 470 } 471 if (connected) { 472 esf_ranks[p] = ranks[i]; 473 esf_roffset[p+1] = q; 474 p++; 475 } 476 } 477 478 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 479 480 /* SetGraph internally resets the SF, so we only set its fields after the call */ 481 ierr = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 482 esf->nranks = esf_nranks; 483 esf->ndranks = esf_ndranks; 484 esf->ranks = esf_ranks; 485 esf->roffset = esf_roffset; 486 esf->rmine = esf_rmine; 487 esf->rremote = esf_rremote; 488 489 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 490 bas = (PetscSF_Basic*)esf->data; 491 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 492 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 493 expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once. 494 */ 495 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 496 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 497 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 498 p = 0; /* Counter for connected leaf ranks */ 499 q = 0; /* Counter for connected roots */ 500 for (i=0; i<niranks; i++) { 501 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 502 for (j=ioffset[i]; j<ioffset[i+1]; j++) { 503 PetscInt loc; 504 ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr); 505 if (loc >= 0) { /* Found in selected this root is connected */ 506 bas->irootloc[q++] = irootloc[j]; 507 connected = PETSC_TRUE; 508 } 509 } 510 if (connected) { 511 bas->niranks++; 512 if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 513 bas->iranks[p] = iranks[i]; 514 bas->ioffset[p+1] = q; 515 p++; 516 } 517 } 518 bas->itotal = q; 519 520 /* Setup packing optimizations */ 521 ierr = PetscSFPackSetupOptimization_Basic(esf);CHKERRQ(ierr); 522 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 523 524 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 525 *newsf = esf; 526 PetscFunctionReturn(0); 527 } 528 529 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 530 { 531 PetscSF esf; 532 PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal; 533 const PetscInt *ilocal,*ioffset,*irootloc,*buffer; 534 const PetscMPIInt *iranks; 535 PetscSFPack_Basic link; 536 PetscSFNode *new_iremote; 537 const PetscSFNode *iremote; 538 PetscSF_Basic *bas; 539 MPI_Group group; 540 PetscErrorCode ierr; 541 542 PetscFunctionBegin; 543 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 544 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 545 546 /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */ 547 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 548 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 549 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 550 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 551 for (i=0; i<nselected; i++) { 552 const PetscInt l = selected[i]; 553 new_ilocal[i] = ilocal ? ilocal[l] : l; 554 new_iremote[i].rank = iremote[l].rank; 555 new_iremote[i].index = iremote[l].index; 556 } 557 558 /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */ 559 maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 560 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 561 for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */ 562 563 ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 564 565 /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to 566 map rmine[i] (ilocal of leaves) back to selected[j] (leaf indices). 567 */ 568 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 569 ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr); 570 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 571 572 /* Set up the incoming communication (i.e., recv info) */ 573 ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); 574 bas = (PetscSF_Basic*)esf->data; 575 ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 576 ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 577 578 /* Pass info about selected leaves to root buffer */ 579 ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,leafdata-minleaf,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */ 580 ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 581 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 582 583 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 584 p = 0; /* Counter for connected leaf ranks */ 585 q = 0; /* Counter for connected roots */ 586 for (i=0; i<niranks; i++) { 587 buffer = i < ndiranks? (PetscInt*)link->selfbuf : (PetscInt*)link->rootbuf + (ioffset[i] - ioffset[ndiranks]); 588 PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 589 for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) { 590 if (buffer[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;} 591 } 592 if (connected) { 593 bas->niranks++; 594 if (i<ndiranks) bas->ndiranks++; 595 bas->iranks[p] = iranks[i]; 596 bas->ioffset[p+1] = q; 597 p++; 598 } 599 } 600 bas->itotal = q; 601 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 602 603 /* Setup packing optimizations */ 604 ierr = PetscSFPackSetupOptimization_Basic(esf);CHKERRQ(ierr); 605 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 606 607 ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 608 *newsf = esf; 609 PetscFunctionReturn(0); 610 } 611 612 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 613 { 614 PetscSF_Basic *dat; 615 PetscErrorCode ierr; 616 617 PetscFunctionBegin; 618 sf->ops->SetUp = PetscSFSetUp_Basic; 619 sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic; 620 sf->ops->Reset = PetscSFReset_Basic; 621 sf->ops->Destroy = PetscSFDestroy_Basic; 622 sf->ops->View = PetscSFView_Basic; 623 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic; 624 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 625 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 626 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 627 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 628 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 629 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 630 sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 631 sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic; 632 633 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 634 sf->data = (void*)dat; 635 PetscFunctionReturn(0); 636 } 637