1 2 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3 4 /*===================================================================================*/ 5 /* Internal routines for PetscSFPack_Basic */ 6 /*===================================================================================*/ 7 8 /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been 9 initialized (since we use persistent requests), then initialize them. 10 */ 11 static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs) 12 { 13 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 14 MPI_Request *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half; 15 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 16 const PetscInt *rootoffset,*leafoffset; 17 PetscMPIInt n; 18 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 23 if (!link->initialized[direction]) { 24 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 25 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 26 if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 27 for (i=0; i<nrootranks; i++) { 28 if (i >= ndrootranks) { 29 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 30 ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr); 31 } 32 } 33 for (i=0; i<nleafranks; i++) { 34 if (i >= ndleafranks) { 35 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 36 ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr); 37 } 38 } 39 } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 40 for (i=0; i<nrootranks; i++) { 41 if (i >= ndrootranks) { 42 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 43 ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr); 44 } 45 } 46 for (i=0; i<nleafranks; i++) { 47 if (i >= ndleafranks) { 48 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 49 ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr); 50 } 51 } 52 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction); 53 54 link->initialized[direction] = PETSC_TRUE; 55 } 56 57 if (rootreqs) *rootreqs = requests; 58 if (leafreqs) *leafreqs = requests + bas->niranks; 59 PetscFunctionReturn(0); 60 } 61 62 static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFDirection direction,PetscSFPack_Basic *mylink) 63 { 64 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 65 PetscErrorCode ierr; 66 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 67 const PetscInt *rootoffset,*leafoffset; 68 PetscSFPack *p; 69 PetscSFPack_Basic link; 70 71 PetscFunctionBegin; 72 /* Look for types in cache */ 73 for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) { 74 PetscBool match; 75 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 76 if (match) { 77 *p = link->next; /* Remove from available list */ 78 goto found; 79 } 80 } 81 82 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 83 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 84 ierr = PetscNew(&link);CHKERRQ(ierr); 85 ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 86 ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr); 87 /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */ 88 link->half = nrootranks + nleafranks; 89 ierr = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr); 90 for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */ 91 /* One tag per link */ 92 ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); 93 94 /* Allocate root and leaf buffers */ 95 for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);} 96 for (i=0; i<nleafranks; i++) { 97 if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */ 98 if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks"); 99 link->leaf[i] = link->root[0]; 100 continue; 101 } 102 ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr); 103 } 104 105 found: 106 link->key = key; 107 link->next = bas->inuse; 108 bas->inuse = (PetscSFPack)link; 109 110 *mylink = link; 111 PetscFunctionReturn(0); 112 } 113 114 /*===================================================================================*/ 115 /* SF public interface implementations */ 116 /*===================================================================================*/ 117 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 118 { 119 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 120 PetscErrorCode ierr; 121 PetscInt *rlengths,*ilengths,i; 122 PetscMPIInt rank,niranks,*iranks,tag; 123 MPI_Comm comm; 124 MPI_Group group; 125 MPI_Request *rootreqs,*leafreqs; 126 127 PetscFunctionBegin; 128 ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 129 ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 130 ierr = MPI_Group_free(&group);CHKERRQ(ierr); 131 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 132 ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 133 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 134 /* 135 * Inform roots about how many leaves and from which ranks 136 */ 137 ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 138 /* Determine number, sending ranks, and length of incoming */ 139 for (i=0; i<sf->nranks; i++) { 140 rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 141 } 142 ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 143 144 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 145 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 146 small and the sorting is cheap. 147 */ 148 ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 149 150 /* Partition into distinguished and non-distinguished incoming ranks */ 151 bas->ndiranks = sf->ndranks; 152 bas->niranks = bas->ndiranks + niranks; 153 ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 154 bas->ioffset[0] = 0; 155 for (i=0; i<bas->ndiranks; i++) { 156 bas->iranks[i] = sf->ranks[i]; 157 bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 158 } 159 if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 160 for ( ; i<bas->niranks; i++) { 161 bas->iranks[i] = iranks[i-bas->ndiranks]; 162 bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 163 } 164 bas->itotal = bas->ioffset[i]; 165 ierr = PetscFree(rlengths);CHKERRQ(ierr); 166 ierr = PetscFree(iranks);CHKERRQ(ierr); 167 ierr = PetscFree(ilengths);CHKERRQ(ierr); 168 169 /* Send leaf identities to roots */ 170 ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 171 ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 172 for (i=bas->ndiranks; i<bas->niranks; i++) { 173 ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 174 } 175 for (i=0; i<sf->nranks; i++) { 176 PetscMPIInt npoints; 177 ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 178 if (i < sf->ndranks) { 179 if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 180 if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 181 if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 182 ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 183 continue; 184 } 185 ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 186 } 187 ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 188 ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 189 ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 190 191 /* Setup packing optimization for root and leaf */ 192 ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr); 193 ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf) 198 { 199 PetscErrorCode ierr; 200 201 PetscFunctionBegin; 202 ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr); 203 ierr = PetscOptionsTail();CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 208 { 209 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 210 PetscErrorCode ierr; 211 PetscSFPack_Basic link,next; 212 213 PetscFunctionBegin; 214 if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 215 ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 216 ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 217 for (link=(PetscSFPack_Basic)bas->avail; link; link=next) { 218 PetscInt i; 219 next = (PetscSFPack_Basic)link->next; 220 if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 221 for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);} 222 for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */ 223 ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr); 224 /* Free persistent requests using MPI_Request_free */ 225 for (i=0; i<link->half*2; i++) { 226 if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);} 227 } 228 ierr = PetscFree(link->requests);CHKERRQ(ierr); 229 ierr = PetscFree(link);CHKERRQ(ierr); 230 } 231 bas->avail = NULL; 232 ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr); 233 ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 238 { 239 PetscErrorCode ierr; 240 241 PetscFunctionBegin; 242 ierr = PetscSFReset(sf);CHKERRQ(ierr); 243 ierr = PetscFree(sf->data);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 248 { 249 PetscErrorCode ierr; 250 PetscBool iascii; 251 252 PetscFunctionBegin; 253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 254 if (iascii) { 255 ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr); 256 } 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 261 { 262 PetscErrorCode ierr; 263 PetscSFPack_Basic link; 264 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 265 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 266 const PetscMPIInt *rootranks,*leafranks; 267 MPI_Request *rootreqs,*leafreqs; 268 PetscMPIInt n; 269 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 270 271 PetscFunctionBegin; 272 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 273 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 274 ierr = PetscSFPackGet_Basic(sf,unit,rootdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 275 276 ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 277 /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */ 278 ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr); 279 ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */ 280 281 /* Pack and send root data */ 282 for (i=0; i<nrootranks; i++) { 283 void *packstart = link->root[i]; 284 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 285 (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart); 286 if (i < ndrootranks) continue; /* shared memory */ 287 ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr); 288 } 289 PetscFunctionReturn(0); 290 } 291 292 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 293 { 294 PetscErrorCode ierr; 295 PetscSFPack_Basic link; 296 PetscInt i,nleafranks,ndleafranks; 297 const PetscInt *leafoffset,*leafloc; 298 PetscErrorCode (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 299 PetscMPIInt typesize = -1; 300 301 PetscFunctionBegin; 302 ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 303 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 304 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 305 ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 306 307 if (UnpackAndOp) { typesize = link->unitbytes; } 308 else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); } 309 310 for (i=0; i<nleafranks; i++) { 311 PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 312 char *packstart = (char *) link->leaf[i]; 313 if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); } 314 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 315 else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 316 PetscInt j; 317 for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); } 318 } 319 #else 320 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 321 #endif 322 } 323 324 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 /* leaf -> root with reduction */ 329 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 330 { 331 PetscSFPack_Basic link; 332 PetscErrorCode ierr; 333 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 334 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 335 const PetscMPIInt *rootranks,*leafranks; 336 MPI_Request *rootreqs,*leafreqs; 337 PetscMPIInt n; 338 339 PetscFunctionBegin; 340 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 341 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 342 ierr = PetscSFPackGet_Basic(sf,unit,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 343 344 ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr); 345 /* Eagerly post root receives for non-distinguished ranks */ 346 ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr); 347 ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr); 348 349 /* Pack and send leaf data */ 350 for (i=0; i<nleafranks; i++) { 351 void *packstart = link->leaf[i]; 352 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 353 (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart); 354 if (i < ndleafranks) continue; /* shared memory */ 355 ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr); 356 } 357 PetscFunctionReturn(0); 358 } 359 360 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 361 { 362 PetscErrorCode (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 363 PetscErrorCode ierr; 364 PetscSFPack_Basic link; 365 PetscInt i,nrootranks; 366 PetscMPIInt typesize = -1; 367 const PetscInt *rootoffset,*rootloc; 368 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 369 370 PetscFunctionBegin; 371 ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 372 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 373 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 374 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr); 375 ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 376 if (UnpackAndOp) { 377 typesize = link->unitbytes; 378 } 379 else { 380 ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); 381 } 382 for (i=0; i<nrootranks; i++) { 383 PetscMPIInt n = rootoffset[i+1] - rootoffset[i]; 384 char *packstart = (char *) link->root[i]; 385 386 if (UnpackAndOp) { 387 (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart); 388 } 389 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 390 else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 391 PetscInt j; 392 393 for (j = 0; j < n; j++) { 394 ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); 395 } 396 } 397 #else 398 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 399 #endif 400 } 401 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 406 { 407 PetscErrorCode ierr; 408 409 PetscFunctionBegin; 410 ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 411 PetscFunctionReturn(0); 412 } 413 414 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 415 { 416 PetscErrorCode (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); 417 PetscErrorCode ierr; 418 PetscSFPack_Basic link; 419 PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 420 const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 421 const PetscMPIInt *rootranks,*leafranks; 422 MPI_Request *rootreqs,*leafreqs; 423 PetscMPIInt n; 424 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 425 426 PetscFunctionBegin; 427 ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 428 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 429 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 430 ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 431 ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 432 433 ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 434 /* Post leaf receives */ 435 ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr); 436 ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); 437 438 /* Process local fetch-and-op, post root sends */ 439 ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr); 440 for (i=0; i<nrootranks; i++) { 441 void *packstart = link->root[i]; 442 ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 443 (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart); 444 if (i < ndrootranks) continue; /* shared memory */ 445 ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr); 446 } 447 ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 448 for (i=0; i<nleafranks; i++) { 449 const void *packstart = link->leaf[i]; 450 ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 451 (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart); 452 } 453 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 458 { 459 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 460 461 PetscFunctionBegin; 462 if (niranks) *niranks = bas->niranks; 463 if (iranks) *iranks = bas->iranks; 464 if (ioffset) *ioffset = bas->ioffset; 465 if (irootloc) *irootloc = bas->irootloc; 466 PetscFunctionReturn(0); 467 } 468 469 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 470 { 471 PetscSF_Basic *dat; 472 PetscErrorCode ierr; 473 474 PetscFunctionBegin; 475 sf->ops->SetUp = PetscSFSetUp_Basic; 476 sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic; 477 sf->ops->Reset = PetscSFReset_Basic; 478 sf->ops->Destroy = PetscSFDestroy_Basic; 479 sf->ops->View = PetscSFView_Basic; 480 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic; 481 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 482 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 483 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 484 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 485 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 486 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 487 488 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 489 sf->data = (void*)dat; 490 PetscFunctionReturn(0); 491 } 492