18cd53115SBarry Smith 2*40e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 395fce210SBarry Smith 4*40e23c03SJunchao Zhang /*===================================================================================*/ 5*40e23c03SJunchao Zhang /* Internal routines for PetscSFPack_Basic */ 6*40e23c03SJunchao Zhang /*===================================================================================*/ 795fce210SBarry Smith 8*40e23c03SJunchao Zhang /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been 9*40e23c03SJunchao Zhang initialized (since we use persistent requests), then initialize them. 1095fce210SBarry Smith */ 11*40e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs) 12*40e23c03SJunchao Zhang { 13*40e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 14*40e23c03SJunchao Zhang MPI_Request *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half; 15*40e23c03SJunchao Zhang PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 16*40e23c03SJunchao Zhang const PetscInt *rootoffset,*leafoffset; 17*40e23c03SJunchao Zhang PetscMPIInt n; 18*40e23c03SJunchao Zhang MPI_Comm comm = PetscObjectComm((PetscObject)sf); 19*40e23c03SJunchao Zhang PetscErrorCode ierr; 2095fce210SBarry Smith 21*40e23c03SJunchao Zhang PetscFunctionBegin; 2295fce210SBarry Smith 23*40e23c03SJunchao Zhang if (!link->initialized[direction]) { 24*40e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 25*40e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 26*40e23c03SJunchao Zhang if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 27*40e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) { 28*40e23c03SJunchao Zhang if (i >= ndrootranks) { 29*40e23c03SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 30*40e23c03SJunchao Zhang ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr); 31*40e23c03SJunchao Zhang } 32*40e23c03SJunchao Zhang } 33*40e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 34*40e23c03SJunchao Zhang if (i >= ndleafranks) { 35*40e23c03SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 36*40e23c03SJunchao Zhang ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr); 37*40e23c03SJunchao Zhang } 38*40e23c03SJunchao Zhang } 39*40e23c03SJunchao Zhang } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 40*40e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) { 41*40e23c03SJunchao Zhang if (i >= ndrootranks) { 42*40e23c03SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 43*40e23c03SJunchao Zhang ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr); 44*40e23c03SJunchao Zhang } 45*40e23c03SJunchao Zhang } 46*40e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 47*40e23c03SJunchao Zhang if (i >= ndleafranks) { 48*40e23c03SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 49*40e23c03SJunchao Zhang ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr); 50*40e23c03SJunchao Zhang } 51*40e23c03SJunchao Zhang } 52*40e23c03SJunchao Zhang } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction); 53*40e23c03SJunchao Zhang 54*40e23c03SJunchao Zhang link->initialized[direction] = PETSC_TRUE; 5595fce210SBarry Smith } 5695fce210SBarry Smith 57*40e23c03SJunchao Zhang if (rootreqs) *rootreqs = requests; 58*40e23c03SJunchao Zhang if (leafreqs) *leafreqs = requests + bas->niranks; 59*40e23c03SJunchao Zhang PetscFunctionReturn(0); 6095fce210SBarry Smith } 6195fce210SBarry Smith 62*40e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFDirection direction,PetscSFPack_Basic *mylink) 63*40e23c03SJunchao Zhang { 64*40e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 65*40e23c03SJunchao Zhang PetscErrorCode ierr; 66*40e23c03SJunchao Zhang PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 67*40e23c03SJunchao Zhang const PetscInt *rootoffset,*leafoffset; 68*40e23c03SJunchao Zhang PetscSFPack *p; 69*40e23c03SJunchao Zhang PetscSFPack_Basic link; 70*40e23c03SJunchao Zhang 71*40e23c03SJunchao Zhang PetscFunctionBegin; 72*40e23c03SJunchao Zhang /* Look for types in cache */ 73*40e23c03SJunchao Zhang for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) { 74*40e23c03SJunchao Zhang PetscBool match; 75*40e23c03SJunchao Zhang ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 76*40e23c03SJunchao Zhang if (match) { 77*40e23c03SJunchao Zhang *p = link->next; /* Remove from available list */ 78*40e23c03SJunchao Zhang goto found; 79*40e23c03SJunchao Zhang } 8053deab39SPeter Brune } 8153deab39SPeter Brune 82*40e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 83*40e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 84*40e23c03SJunchao Zhang ierr = PetscNew(&link);CHKERRQ(ierr); 85*40e23c03SJunchao Zhang ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 86*40e23c03SJunchao Zhang ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr); 87*40e23c03SJunchao Zhang /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */ 88*40e23c03SJunchao Zhang link->half = nrootranks + nleafranks; 89*40e23c03SJunchao Zhang ierr = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr); 90*40e23c03SJunchao Zhang for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */ 91*40e23c03SJunchao Zhang /* One tag per link */ 92*40e23c03SJunchao Zhang ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); 9353deab39SPeter Brune 94*40e23c03SJunchao Zhang /* Allocate root and leaf buffers */ 95*40e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);} 96*40e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 97*40e23c03SJunchao Zhang if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */ 98*40e23c03SJunchao Zhang if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks"); 99*40e23c03SJunchao Zhang link->leaf[i] = link->root[0]; 100*40e23c03SJunchao Zhang continue; 101*40e23c03SJunchao Zhang } 102*40e23c03SJunchao Zhang ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr); 10353deab39SPeter Brune } 10453deab39SPeter Brune 105*40e23c03SJunchao Zhang found: 106*40e23c03SJunchao Zhang link->key = key; 107*40e23c03SJunchao Zhang link->next = bas->inuse; 108*40e23c03SJunchao Zhang bas->inuse = (PetscSFPack)link; 109*40e23c03SJunchao Zhang 110*40e23c03SJunchao Zhang *mylink = link; 111*40e23c03SJunchao Zhang PetscFunctionReturn(0); 11295fce210SBarry Smith } 11395fce210SBarry Smith 114*40e23c03SJunchao Zhang /*===================================================================================*/ 115*40e23c03SJunchao Zhang /* SF public interface implementations */ 116*40e23c03SJunchao Zhang /*===================================================================================*/ 117*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 11895fce210SBarry Smith { 11995fce210SBarry Smith PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 12095fce210SBarry Smith PetscErrorCode ierr; 12195fce210SBarry Smith PetscInt *rlengths,*ilengths,i; 122*40e23c03SJunchao Zhang PetscMPIInt rank,niranks,*iranks,tag; 12395fce210SBarry Smith MPI_Comm comm; 124b5a8e515SJed Brown MPI_Group group; 125*40e23c03SJunchao Zhang MPI_Request *rootreqs,*leafreqs; 12695fce210SBarry Smith 12795fce210SBarry Smith PetscFunctionBegin; 128b5a8e515SJed Brown ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 129b5a8e515SJed Brown ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 130b5a8e515SJed Brown ierr = MPI_Group_free(&group);CHKERRQ(ierr); 13195fce210SBarry Smith ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 132*40e23c03SJunchao Zhang ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 133c943f53fSJed Brown ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 13495fce210SBarry Smith /* 13595fce210SBarry Smith * Inform roots about how many leaves and from which ranks 13695fce210SBarry Smith */ 137785e854fSJed Brown ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 13895fce210SBarry Smith /* Determine number, sending ranks, and length of incoming */ 13995fce210SBarry Smith for (i=0; i<sf->nranks; i++) { 14095fce210SBarry Smith rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 14195fce210SBarry Smith } 142*40e23c03SJunchao Zhang ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 143c943f53fSJed Brown 1440b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 1450b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 1460b899082SJunchao Zhang small and the sorting is cheap. 1470b899082SJunchao Zhang */ 1480b899082SJunchao Zhang ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 1490b899082SJunchao Zhang 150c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 151c943f53fSJed Brown bas->ndiranks = sf->ndranks; 152c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 153c943f53fSJed Brown ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 154c943f53fSJed Brown bas->ioffset[0] = 0; 155c943f53fSJed Brown for (i=0; i<bas->ndiranks; i++) { 156c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 157c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 158c943f53fSJed Brown } 159*40e23c03SJunchao Zhang if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 160*40e23c03SJunchao Zhang for ( ; i<bas->niranks; i++) { 161c943f53fSJed Brown bas->iranks[i] = iranks[i-bas->ndiranks]; 162c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 163c943f53fSJed Brown } 164c943f53fSJed Brown bas->itotal = bas->ioffset[i]; 16595fce210SBarry Smith ierr = PetscFree(rlengths);CHKERRQ(ierr); 166c943f53fSJed Brown ierr = PetscFree(iranks);CHKERRQ(ierr); 167c943f53fSJed Brown ierr = PetscFree(ilengths);CHKERRQ(ierr); 16895fce210SBarry Smith 16995fce210SBarry Smith /* Send leaf identities to roots */ 170c943f53fSJed Brown ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 171*40e23c03SJunchao Zhang ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 172*40e23c03SJunchao Zhang for (i=bas->ndiranks; i<bas->niranks; i++) { 173*40e23c03SJunchao Zhang ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 174*40e23c03SJunchao Zhang } 175*40e23c03SJunchao Zhang for (i=0; i<sf->nranks; i++) { 17695fce210SBarry Smith PetscMPIInt npoints; 17795fce210SBarry Smith ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 178*40e23c03SJunchao Zhang if (i < sf->ndranks) { 179*40e23c03SJunchao Zhang if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 180*40e23c03SJunchao Zhang if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 181*40e23c03SJunchao Zhang if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 182*40e23c03SJunchao Zhang ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 183c943f53fSJed Brown continue; 184c943f53fSJed Brown } 185*40e23c03SJunchao Zhang ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 186bf39f1bfSJed Brown } 187*40e23c03SJunchao Zhang ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 188*40e23c03SJunchao Zhang ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 189*40e23c03SJunchao Zhang ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 19095fce210SBarry Smith 191*40e23c03SJunchao Zhang /* Setup packing optimization for root and leaf */ 192*40e23c03SJunchao Zhang ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr); 193*40e23c03SJunchao Zhang ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 19495fce210SBarry Smith PetscFunctionReturn(0); 19595fce210SBarry Smith } 19695fce210SBarry Smith 1974416b707SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf) 19895fce210SBarry Smith { 19995fce210SBarry Smith PetscErrorCode ierr; 20095fce210SBarry Smith 20195fce210SBarry Smith PetscFunctionBegin; 202e55864a3SBarry Smith ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr); 20395fce210SBarry Smith ierr = PetscOptionsTail();CHKERRQ(ierr); 20495fce210SBarry Smith PetscFunctionReturn(0); 20595fce210SBarry Smith } 20695fce210SBarry Smith 207*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 20895fce210SBarry Smith { 20995fce210SBarry Smith PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 21095fce210SBarry Smith PetscErrorCode ierr; 211*40e23c03SJunchao Zhang PetscSFPack_Basic link,next; 21295fce210SBarry Smith 21395fce210SBarry Smith PetscFunctionBegin; 21429046d53SLisandro Dalcin if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 215c943f53fSJed Brown ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 216c943f53fSJed Brown ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 217*40e23c03SJunchao Zhang for (link=(PetscSFPack_Basic)bas->avail; link; link=next) { 218bf39f1bfSJed Brown PetscInt i; 219*40e23c03SJunchao Zhang next = (PetscSFPack_Basic)link->next; 22077b7e48dSJunchao Zhang if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 221bf39f1bfSJed Brown for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);} 222c943f53fSJed Brown for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */ 22395fce210SBarry Smith ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr); 22430e38525SJunchao Zhang /* Free persistent requests using MPI_Request_free */ 225*40e23c03SJunchao Zhang for (i=0; i<link->half*2; i++) { 226*40e23c03SJunchao Zhang if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);} 22730e38525SJunchao Zhang } 22895fce210SBarry Smith ierr = PetscFree(link->requests);CHKERRQ(ierr); 22995fce210SBarry Smith ierr = PetscFree(link);CHKERRQ(ierr); 23095fce210SBarry Smith } 23195fce210SBarry Smith bas->avail = NULL; 232*40e23c03SJunchao Zhang ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr); 233*40e23c03SJunchao Zhang ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr); 23495fce210SBarry Smith PetscFunctionReturn(0); 23595fce210SBarry Smith } 23695fce210SBarry Smith 237*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 23895fce210SBarry Smith { 23995fce210SBarry Smith PetscErrorCode ierr; 24095fce210SBarry Smith 24195fce210SBarry Smith PetscFunctionBegin; 242*40e23c03SJunchao Zhang ierr = PetscSFReset(sf);CHKERRQ(ierr); 24395fce210SBarry Smith ierr = PetscFree(sf->data);CHKERRQ(ierr); 24495fce210SBarry Smith PetscFunctionReturn(0); 24595fce210SBarry Smith } 24695fce210SBarry Smith 247*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 24895fce210SBarry Smith { 24995fce210SBarry Smith PetscErrorCode ierr; 25095fce210SBarry Smith PetscBool iascii; 25195fce210SBarry Smith 25295fce210SBarry Smith PetscFunctionBegin; 25395fce210SBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 25495fce210SBarry Smith if (iascii) { 25595fce210SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr); 25695fce210SBarry Smith } 25795fce210SBarry Smith PetscFunctionReturn(0); 25895fce210SBarry Smith } 25995fce210SBarry Smith 2603482bfa8SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 26195fce210SBarry Smith { 26295fce210SBarry Smith PetscErrorCode ierr; 263*40e23c03SJunchao Zhang PetscSFPack_Basic link; 264c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 26595fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 26695fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 26795fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 26830e38525SJunchao Zhang PetscMPIInt n; 269*40e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 27095fce210SBarry Smith 27195fce210SBarry Smith PetscFunctionBegin; 272*40e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 273*40e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 274*40e23c03SJunchao Zhang ierr = PetscSFPackGet_Basic(sf,unit,rootdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 27595fce210SBarry Smith 276*40e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 277c943f53fSJed Brown /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */ 27830e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr); 279*40e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */ 28030e38525SJunchao Zhang 28195fce210SBarry Smith /* Pack and send root data */ 28295fce210SBarry Smith for (i=0; i<nrootranks; i++) { 283bf39f1bfSJed Brown void *packstart = link->root[i]; 28430e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 285*40e23c03SJunchao Zhang (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart); 286c943f53fSJed Brown if (i < ndrootranks) continue; /* shared memory */ 287*40e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr); 28895fce210SBarry Smith } 28995fce210SBarry Smith PetscFunctionReturn(0); 29095fce210SBarry Smith } 29195fce210SBarry Smith 292*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 29395fce210SBarry Smith { 29495fce210SBarry Smith PetscErrorCode ierr; 295*40e23c03SJunchao Zhang PetscSFPack_Basic link; 296c943f53fSJed Brown PetscInt i,nleafranks,ndleafranks; 29795fce210SBarry Smith const PetscInt *leafoffset,*leafloc; 298*40e23c03SJunchao Zhang PetscErrorCode (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 2993482bfa8SJunchao Zhang PetscMPIInt typesize = -1; 30095fce210SBarry Smith 30195fce210SBarry Smith PetscFunctionBegin; 302*40e23c03SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 303*40e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 304*40e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 305*40e23c03SJunchao Zhang ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 3063482bfa8SJunchao Zhang 307*40e23c03SJunchao Zhang if (UnpackAndOp) { typesize = link->unitbytes; } 3083482bfa8SJunchao Zhang else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); } 3093482bfa8SJunchao Zhang 31095fce210SBarry Smith for (i=0; i<nleafranks; i++) { 31195fce210SBarry Smith PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 3123482bfa8SJunchao Zhang char *packstart = (char *) link->leaf[i]; 313*40e23c03SJunchao Zhang if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); } 3143482bfa8SJunchao Zhang #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 3153482bfa8SJunchao Zhang else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 3163482bfa8SJunchao Zhang PetscInt j; 3173482bfa8SJunchao Zhang for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); } 31895fce210SBarry Smith } 3193482bfa8SJunchao Zhang #else 320d2a25953SJunchao Zhang else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 3213482bfa8SJunchao Zhang #endif 3223482bfa8SJunchao Zhang } 3233482bfa8SJunchao Zhang 324*40e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 32595fce210SBarry Smith PetscFunctionReturn(0); 32695fce210SBarry Smith } 32795fce210SBarry Smith 32895fce210SBarry Smith /* leaf -> root with reduction */ 329*40e23c03SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 33095fce210SBarry Smith { 331*40e23c03SJunchao Zhang PetscSFPack_Basic link; 33295fce210SBarry Smith PetscErrorCode ierr; 333c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 33495fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 33595fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 33695fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 33730e38525SJunchao Zhang PetscMPIInt n; 33895fce210SBarry Smith 33995fce210SBarry Smith PetscFunctionBegin; 340*40e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 341*40e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 342*40e23c03SJunchao Zhang ierr = PetscSFPackGet_Basic(sf,unit,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 34395fce210SBarry Smith 344*40e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr); 345c943f53fSJed Brown /* Eagerly post root receives for non-distinguished ranks */ 34630e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr); 347*40e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr); 34830e38525SJunchao Zhang 34995fce210SBarry Smith /* Pack and send leaf data */ 35095fce210SBarry Smith for (i=0; i<nleafranks; i++) { 351bf39f1bfSJed Brown void *packstart = link->leaf[i]; 35230e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 353*40e23c03SJunchao Zhang (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart); 354c943f53fSJed Brown if (i < ndleafranks) continue; /* shared memory */ 355*40e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr); 35695fce210SBarry Smith } 35795fce210SBarry Smith PetscFunctionReturn(0); 35895fce210SBarry Smith } 35995fce210SBarry Smith 360*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 36195fce210SBarry Smith { 362*40e23c03SJunchao Zhang PetscErrorCode (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 36395fce210SBarry Smith PetscErrorCode ierr; 364*40e23c03SJunchao Zhang PetscSFPack_Basic link; 36595fce210SBarry Smith PetscInt i,nrootranks; 3661620da3bSToby Isaac PetscMPIInt typesize = -1; 36795fce210SBarry Smith const PetscInt *rootoffset,*rootloc; 368*40e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 36995fce210SBarry Smith 37095fce210SBarry Smith PetscFunctionBegin; 371*40e23c03SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 37295fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 373*40e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 374*40e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr); 375*40e23c03SJunchao Zhang ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 376*40e23c03SJunchao Zhang if (UnpackAndOp) { 3771620da3bSToby Isaac typesize = link->unitbytes; 3781620da3bSToby Isaac } 3791620da3bSToby Isaac else { 3801620da3bSToby Isaac ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); 3811620da3bSToby Isaac } 38295fce210SBarry Smith for (i=0; i<nrootranks; i++) { 38395fce210SBarry Smith PetscMPIInt n = rootoffset[i+1] - rootoffset[i]; 384bf39f1bfSJed Brown char *packstart = (char *) link->root[i]; 38595fce210SBarry Smith 386*40e23c03SJunchao Zhang if (UnpackAndOp) { 387*40e23c03SJunchao Zhang (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart); 38895fce210SBarry Smith } 38962795ce3SStefano Zampini #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 39078ec4774SToby Isaac else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 39178ec4774SToby Isaac PetscInt j; 3921620da3bSToby Isaac 3931620da3bSToby Isaac for (j = 0; j < n; j++) { 39478ec4774SToby Isaac ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); 3951620da3bSToby Isaac } 3961620da3bSToby Isaac } 397a9ec7e9eSToby Isaac #else 398d2a25953SJunchao Zhang else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 399a9ec7e9eSToby Isaac #endif 4001620da3bSToby Isaac } 401*40e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 40295fce210SBarry Smith PetscFunctionReturn(0); 40395fce210SBarry Smith } 40495fce210SBarry Smith 405*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 40695fce210SBarry Smith { 40795fce210SBarry Smith PetscErrorCode ierr; 40895fce210SBarry Smith 40995fce210SBarry Smith PetscFunctionBegin; 410*40e23c03SJunchao Zhang ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 41195fce210SBarry Smith PetscFunctionReturn(0); 41295fce210SBarry Smith } 41395fce210SBarry Smith 41495fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 41595fce210SBarry Smith { 416*40e23c03SJunchao Zhang PetscErrorCode (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); 41795fce210SBarry Smith PetscErrorCode ierr; 418*40e23c03SJunchao Zhang PetscSFPack_Basic link; 419c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 42095fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 42195fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 42295fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 42330e38525SJunchao Zhang PetscMPIInt n; 424*40e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 42595fce210SBarry Smith 42695fce210SBarry Smith PetscFunctionBegin; 427*40e23c03SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 42895fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 429*40e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 430*40e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 431*40e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 432*40e23c03SJunchao Zhang 433*40e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 43495fce210SBarry Smith /* Post leaf receives */ 43530e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr); 436*40e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); 43730e38525SJunchao Zhang 43895fce210SBarry Smith /* Process local fetch-and-op, post root sends */ 439*40e23c03SJunchao Zhang ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr); 44095fce210SBarry Smith for (i=0; i<nrootranks; i++) { 441bf39f1bfSJed Brown void *packstart = link->root[i]; 44230e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 443*40e23c03SJunchao Zhang (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart); 444c943f53fSJed Brown if (i < ndrootranks) continue; /* shared memory */ 445*40e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr); 44695fce210SBarry Smith } 447*40e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 44895fce210SBarry Smith for (i=0; i<nleafranks; i++) { 449bf39f1bfSJed Brown const void *packstart = link->leaf[i]; 45030e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 451*40e23c03SJunchao Zhang (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart); 45295fce210SBarry Smith } 453*40e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 45495fce210SBarry Smith PetscFunctionReturn(0); 45595fce210SBarry Smith } 45695fce210SBarry Smith 457*40e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 4588750ddebSJunchao Zhang { 4598750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 4608750ddebSJunchao Zhang 4618750ddebSJunchao Zhang PetscFunctionBegin; 4628750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 4638750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 4648750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 4658750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 4668750ddebSJunchao Zhang PetscFunctionReturn(0); 4678750ddebSJunchao Zhang } 4688750ddebSJunchao Zhang 4698cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 47095fce210SBarry Smith { 471*40e23c03SJunchao Zhang PetscSF_Basic *dat; 47295fce210SBarry Smith PetscErrorCode ierr; 47395fce210SBarry Smith 47495fce210SBarry Smith PetscFunctionBegin; 47595fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 47695fce210SBarry Smith sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic; 47795fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 47895fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 47995fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 4803482bfa8SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic; 4813482bfa8SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 48295fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 48395fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 48495fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 48595fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 4868750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 48795fce210SBarry Smith 488*40e23c03SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 489*40e23c03SJunchao Zhang sf->data = (void*)dat; 49095fce210SBarry Smith PetscFunctionReturn(0); 49195fce210SBarry Smith } 492