18cd53115SBarry Smith 240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 395fce210SBarry Smith 440e23c03SJunchao Zhang /*===================================================================================*/ 540e23c03SJunchao Zhang /* Internal routines for PetscSFPack_Basic */ 640e23c03SJunchao Zhang /*===================================================================================*/ 795fce210SBarry Smith 840e23c03SJunchao Zhang /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been 940e23c03SJunchao Zhang initialized (since we use persistent requests), then initialize them. 1095fce210SBarry Smith */ 1140e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs) 1240e23c03SJunchao Zhang { 1340e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 1440e23c03SJunchao Zhang MPI_Request *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half; 1540e23c03SJunchao Zhang PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 1640e23c03SJunchao Zhang const PetscInt *rootoffset,*leafoffset; 1740e23c03SJunchao Zhang PetscMPIInt n; 1840e23c03SJunchao Zhang MPI_Comm comm = PetscObjectComm((PetscObject)sf); 1940e23c03SJunchao Zhang PetscErrorCode ierr; 2095fce210SBarry Smith 2140e23c03SJunchao Zhang PetscFunctionBegin; 2295fce210SBarry Smith 2340e23c03SJunchao Zhang if (!link->initialized[direction]) { 2440e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 2540e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 2640e23c03SJunchao Zhang if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 2740e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) { 2840e23c03SJunchao Zhang if (i >= ndrootranks) { 2940e23c03SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 3040e23c03SJunchao Zhang ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr); 3140e23c03SJunchao Zhang } 3240e23c03SJunchao Zhang } 3340e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 3440e23c03SJunchao Zhang if (i >= ndleafranks) { 3540e23c03SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 3640e23c03SJunchao Zhang ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr); 3740e23c03SJunchao Zhang } 3840e23c03SJunchao Zhang } 3940e23c03SJunchao Zhang } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 4040e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) { 4140e23c03SJunchao Zhang if (i >= ndrootranks) { 4240e23c03SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 4340e23c03SJunchao Zhang ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr); 4440e23c03SJunchao Zhang } 4540e23c03SJunchao Zhang } 4640e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 4740e23c03SJunchao Zhang if (i >= ndleafranks) { 4840e23c03SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 4940e23c03SJunchao Zhang ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr); 5040e23c03SJunchao Zhang } 5140e23c03SJunchao Zhang } 5240e23c03SJunchao Zhang } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction); 5340e23c03SJunchao Zhang 5440e23c03SJunchao Zhang link->initialized[direction] = PETSC_TRUE; 5595fce210SBarry Smith } 5695fce210SBarry Smith 5740e23c03SJunchao Zhang if (rootreqs) *rootreqs = requests; 5840e23c03SJunchao Zhang if (leafreqs) *leafreqs = requests + bas->niranks; 5940e23c03SJunchao Zhang PetscFunctionReturn(0); 6095fce210SBarry Smith } 6195fce210SBarry Smith 6240e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFDirection direction,PetscSFPack_Basic *mylink) 6340e23c03SJunchao Zhang { 6440e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 6540e23c03SJunchao Zhang PetscErrorCode ierr; 6640e23c03SJunchao Zhang PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 6740e23c03SJunchao Zhang const PetscInt *rootoffset,*leafoffset; 6840e23c03SJunchao Zhang PetscSFPack *p; 6940e23c03SJunchao Zhang PetscSFPack_Basic link; 7040e23c03SJunchao Zhang 7140e23c03SJunchao Zhang PetscFunctionBegin; 7240e23c03SJunchao Zhang /* Look for types in cache */ 7340e23c03SJunchao Zhang for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) { 7440e23c03SJunchao Zhang PetscBool match; 7540e23c03SJunchao Zhang ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 7640e23c03SJunchao Zhang if (match) { 7740e23c03SJunchao Zhang *p = link->next; /* Remove from available list */ 7840e23c03SJunchao Zhang goto found; 7940e23c03SJunchao Zhang } 8053deab39SPeter Brune } 8153deab39SPeter Brune 8240e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 8340e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 8440e23c03SJunchao Zhang ierr = PetscNew(&link);CHKERRQ(ierr); 8540e23c03SJunchao Zhang ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 8640e23c03SJunchao Zhang ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr); 8740e23c03SJunchao Zhang /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */ 8840e23c03SJunchao Zhang link->half = nrootranks + nleafranks; 8940e23c03SJunchao Zhang ierr = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr); 9040e23c03SJunchao Zhang for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */ 9140e23c03SJunchao Zhang /* One tag per link */ 9240e23c03SJunchao Zhang ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); 9353deab39SPeter Brune 9440e23c03SJunchao Zhang /* Allocate root and leaf buffers */ 9540e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);} 9640e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 9740e23c03SJunchao Zhang if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */ 9840e23c03SJunchao Zhang if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks"); 9940e23c03SJunchao Zhang link->leaf[i] = link->root[0]; 10040e23c03SJunchao Zhang continue; 10140e23c03SJunchao Zhang } 10240e23c03SJunchao Zhang ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr); 10353deab39SPeter Brune } 10453deab39SPeter Brune 10540e23c03SJunchao Zhang found: 10640e23c03SJunchao Zhang link->key = key; 10740e23c03SJunchao Zhang link->next = bas->inuse; 10840e23c03SJunchao Zhang bas->inuse = (PetscSFPack)link; 10940e23c03SJunchao Zhang 11040e23c03SJunchao Zhang *mylink = link; 11140e23c03SJunchao Zhang PetscFunctionReturn(0); 11295fce210SBarry Smith } 11395fce210SBarry Smith 11440e23c03SJunchao Zhang /*===================================================================================*/ 11540e23c03SJunchao Zhang /* SF public interface implementations */ 11640e23c03SJunchao Zhang /*===================================================================================*/ 11740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 11895fce210SBarry Smith { 11995fce210SBarry Smith PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 12095fce210SBarry Smith PetscErrorCode ierr; 12195fce210SBarry Smith PetscInt *rlengths,*ilengths,i; 12240e23c03SJunchao Zhang PetscMPIInt rank,niranks,*iranks,tag; 12395fce210SBarry Smith MPI_Comm comm; 124b5a8e515SJed Brown MPI_Group group; 12540e23c03SJunchao Zhang MPI_Request *rootreqs,*leafreqs; 12695fce210SBarry Smith 12795fce210SBarry Smith PetscFunctionBegin; 128b5a8e515SJed Brown ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 129b5a8e515SJed Brown ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 130b5a8e515SJed Brown ierr = MPI_Group_free(&group);CHKERRQ(ierr); 13195fce210SBarry Smith ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 13240e23c03SJunchao Zhang ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 133c943f53fSJed Brown ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 13495fce210SBarry Smith /* 13595fce210SBarry Smith * Inform roots about how many leaves and from which ranks 13695fce210SBarry Smith */ 137785e854fSJed Brown ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 13895fce210SBarry Smith /* Determine number, sending ranks, and length of incoming */ 13995fce210SBarry Smith for (i=0; i<sf->nranks; i++) { 14095fce210SBarry Smith rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 14195fce210SBarry Smith } 14240e23c03SJunchao Zhang ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 143c943f53fSJed Brown 1440b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 1450b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 1460b899082SJunchao Zhang small and the sorting is cheap. 1470b899082SJunchao Zhang */ 1480b899082SJunchao Zhang ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 1490b899082SJunchao Zhang 150c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 151c943f53fSJed Brown bas->ndiranks = sf->ndranks; 152c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 153c943f53fSJed Brown ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 154c943f53fSJed Brown bas->ioffset[0] = 0; 155c943f53fSJed Brown for (i=0; i<bas->ndiranks; i++) { 156c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 157c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 158c943f53fSJed Brown } 15940e23c03SJunchao Zhang if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 16040e23c03SJunchao Zhang for ( ; i<bas->niranks; i++) { 161c943f53fSJed Brown bas->iranks[i] = iranks[i-bas->ndiranks]; 162c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 163c943f53fSJed Brown } 164c943f53fSJed Brown bas->itotal = bas->ioffset[i]; 16595fce210SBarry Smith ierr = PetscFree(rlengths);CHKERRQ(ierr); 166c943f53fSJed Brown ierr = PetscFree(iranks);CHKERRQ(ierr); 167c943f53fSJed Brown ierr = PetscFree(ilengths);CHKERRQ(ierr); 16895fce210SBarry Smith 16995fce210SBarry Smith /* Send leaf identities to roots */ 170c943f53fSJed Brown ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 17140e23c03SJunchao Zhang ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 17240e23c03SJunchao Zhang for (i=bas->ndiranks; i<bas->niranks; i++) { 17340e23c03SJunchao Zhang ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 17440e23c03SJunchao Zhang } 17540e23c03SJunchao Zhang for (i=0; i<sf->nranks; i++) { 17695fce210SBarry Smith PetscMPIInt npoints; 17795fce210SBarry Smith ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 17840e23c03SJunchao Zhang if (i < sf->ndranks) { 17940e23c03SJunchao Zhang if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 18040e23c03SJunchao Zhang if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 18140e23c03SJunchao Zhang if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 18240e23c03SJunchao Zhang ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 183c943f53fSJed Brown continue; 184c943f53fSJed Brown } 18540e23c03SJunchao Zhang ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 186bf39f1bfSJed Brown } 18740e23c03SJunchao Zhang ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 18840e23c03SJunchao Zhang ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 18940e23c03SJunchao Zhang ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 19095fce210SBarry Smith 19140e23c03SJunchao Zhang /* Setup packing optimization for root and leaf */ 19240e23c03SJunchao Zhang ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr); 19340e23c03SJunchao Zhang ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 19495fce210SBarry Smith PetscFunctionReturn(0); 19595fce210SBarry Smith } 19695fce210SBarry Smith 1974416b707SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf) 19895fce210SBarry Smith { 19995fce210SBarry Smith PetscErrorCode ierr; 20095fce210SBarry Smith 20195fce210SBarry Smith PetscFunctionBegin; 202e55864a3SBarry Smith ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr); 20395fce210SBarry Smith ierr = PetscOptionsTail();CHKERRQ(ierr); 20495fce210SBarry Smith PetscFunctionReturn(0); 20595fce210SBarry Smith } 20695fce210SBarry Smith 20740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 20895fce210SBarry Smith { 20995fce210SBarry Smith PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 21095fce210SBarry Smith PetscErrorCode ierr; 21140e23c03SJunchao Zhang PetscSFPack_Basic link,next; 21295fce210SBarry Smith 21395fce210SBarry Smith PetscFunctionBegin; 21429046d53SLisandro Dalcin if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 215c943f53fSJed Brown ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 216c943f53fSJed Brown ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 21740e23c03SJunchao Zhang for (link=(PetscSFPack_Basic)bas->avail; link; link=next) { 218bf39f1bfSJed Brown PetscInt i; 21940e23c03SJunchao Zhang next = (PetscSFPack_Basic)link->next; 22077b7e48dSJunchao Zhang if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 221bf39f1bfSJed Brown for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);} 222c943f53fSJed Brown for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */ 22395fce210SBarry Smith ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr); 22430e38525SJunchao Zhang /* Free persistent requests using MPI_Request_free */ 22540e23c03SJunchao Zhang for (i=0; i<link->half*2; i++) { 22640e23c03SJunchao Zhang if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);} 22730e38525SJunchao Zhang } 22895fce210SBarry Smith ierr = PetscFree(link->requests);CHKERRQ(ierr); 22995fce210SBarry Smith ierr = PetscFree(link);CHKERRQ(ierr); 23095fce210SBarry Smith } 23195fce210SBarry Smith bas->avail = NULL; 23240e23c03SJunchao Zhang ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr); 23340e23c03SJunchao Zhang ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr); 23495fce210SBarry Smith PetscFunctionReturn(0); 23595fce210SBarry Smith } 23695fce210SBarry Smith 23740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 23895fce210SBarry Smith { 23995fce210SBarry Smith PetscErrorCode ierr; 24095fce210SBarry Smith 24195fce210SBarry Smith PetscFunctionBegin; 24240e23c03SJunchao Zhang ierr = PetscSFReset(sf);CHKERRQ(ierr); 24395fce210SBarry Smith ierr = PetscFree(sf->data);CHKERRQ(ierr); 24495fce210SBarry Smith PetscFunctionReturn(0); 24595fce210SBarry Smith } 24695fce210SBarry Smith 24740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 24895fce210SBarry Smith { 24995fce210SBarry Smith PetscErrorCode ierr; 25095fce210SBarry Smith PetscBool iascii; 25195fce210SBarry Smith 25295fce210SBarry Smith PetscFunctionBegin; 25395fce210SBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 25495fce210SBarry Smith if (iascii) { 25595fce210SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr); 25695fce210SBarry Smith } 25795fce210SBarry Smith PetscFunctionReturn(0); 25895fce210SBarry Smith } 25995fce210SBarry Smith 2603482bfa8SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 26195fce210SBarry Smith { 26295fce210SBarry Smith PetscErrorCode ierr; 26340e23c03SJunchao Zhang PetscSFPack_Basic link; 264c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 26595fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 26695fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 26795fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 26830e38525SJunchao Zhang PetscMPIInt n; 26940e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 27095fce210SBarry Smith 27195fce210SBarry Smith PetscFunctionBegin; 27240e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 27340e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 27440e23c03SJunchao Zhang ierr = PetscSFPackGet_Basic(sf,unit,rootdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 27595fce210SBarry Smith 27640e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 277c943f53fSJed Brown /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */ 27830e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr); 27940e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */ 28030e38525SJunchao Zhang 28195fce210SBarry Smith /* Pack and send root data */ 28295fce210SBarry Smith for (i=0; i<nrootranks; i++) { 283bf39f1bfSJed Brown void *packstart = link->root[i]; 28430e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 28540e23c03SJunchao Zhang (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart); 286c943f53fSJed Brown if (i < ndrootranks) continue; /* shared memory */ 28740e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr); 28895fce210SBarry Smith } 28995fce210SBarry Smith PetscFunctionReturn(0); 29095fce210SBarry Smith } 29195fce210SBarry Smith 29240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 29395fce210SBarry Smith { 29495fce210SBarry Smith PetscErrorCode ierr; 29540e23c03SJunchao Zhang PetscSFPack_Basic link; 296c943f53fSJed Brown PetscInt i,nleafranks,ndleafranks; 29795fce210SBarry Smith const PetscInt *leafoffset,*leafloc; 29840e23c03SJunchao Zhang PetscErrorCode (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 2993482bfa8SJunchao Zhang PetscMPIInt typesize = -1; 30095fce210SBarry Smith 30195fce210SBarry Smith PetscFunctionBegin; 30240e23c03SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 30340e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 30440e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 30540e23c03SJunchao Zhang ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 3063482bfa8SJunchao Zhang 30740e23c03SJunchao Zhang if (UnpackAndOp) { typesize = link->unitbytes; } 3083482bfa8SJunchao Zhang else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); } 3093482bfa8SJunchao Zhang 31095fce210SBarry Smith for (i=0; i<nleafranks; i++) { 31195fce210SBarry Smith PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 3123482bfa8SJunchao Zhang char *packstart = (char *) link->leaf[i]; 31340e23c03SJunchao Zhang if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); } 3143482bfa8SJunchao Zhang #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 3153482bfa8SJunchao Zhang else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 3163482bfa8SJunchao Zhang PetscInt j; 3173482bfa8SJunchao Zhang for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); } 31895fce210SBarry Smith } 3193482bfa8SJunchao Zhang #else 320d2a25953SJunchao Zhang else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 3213482bfa8SJunchao Zhang #endif 3223482bfa8SJunchao Zhang } 3233482bfa8SJunchao Zhang 32440e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 32595fce210SBarry Smith PetscFunctionReturn(0); 32695fce210SBarry Smith } 32795fce210SBarry Smith 32895fce210SBarry Smith /* leaf -> root with reduction */ 32940e23c03SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 33095fce210SBarry Smith { 33140e23c03SJunchao Zhang PetscSFPack_Basic link; 33295fce210SBarry Smith PetscErrorCode ierr; 333c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 33495fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 33595fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 33695fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 33730e38525SJunchao Zhang PetscMPIInt n; 33895fce210SBarry Smith 33995fce210SBarry Smith PetscFunctionBegin; 34040e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 34140e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 34240e23c03SJunchao Zhang ierr = PetscSFPackGet_Basic(sf,unit,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 34395fce210SBarry Smith 34440e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr); 345c943f53fSJed Brown /* Eagerly post root receives for non-distinguished ranks */ 34630e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr); 34740e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr); 34830e38525SJunchao Zhang 34995fce210SBarry Smith /* Pack and send leaf data */ 35095fce210SBarry Smith for (i=0; i<nleafranks; i++) { 351bf39f1bfSJed Brown void *packstart = link->leaf[i]; 35230e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 35340e23c03SJunchao Zhang (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart); 354c943f53fSJed Brown if (i < ndleafranks) continue; /* shared memory */ 35540e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr); 35695fce210SBarry Smith } 35795fce210SBarry Smith PetscFunctionReturn(0); 35895fce210SBarry Smith } 35995fce210SBarry Smith 36040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 36195fce210SBarry Smith { 36240e23c03SJunchao Zhang PetscErrorCode (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 36395fce210SBarry Smith PetscErrorCode ierr; 36440e23c03SJunchao Zhang PetscSFPack_Basic link; 36595fce210SBarry Smith PetscInt i,nrootranks; 3661620da3bSToby Isaac PetscMPIInt typesize = -1; 36795fce210SBarry Smith const PetscInt *rootoffset,*rootloc; 36840e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 36995fce210SBarry Smith 37095fce210SBarry Smith PetscFunctionBegin; 37140e23c03SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 37295fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 37340e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 37440e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr); 37540e23c03SJunchao Zhang ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 37640e23c03SJunchao Zhang if (UnpackAndOp) { 3771620da3bSToby Isaac typesize = link->unitbytes; 3781620da3bSToby Isaac } 3791620da3bSToby Isaac else { 3801620da3bSToby Isaac ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); 3811620da3bSToby Isaac } 38295fce210SBarry Smith for (i=0; i<nrootranks; i++) { 38395fce210SBarry Smith PetscMPIInt n = rootoffset[i+1] - rootoffset[i]; 384bf39f1bfSJed Brown char *packstart = (char *) link->root[i]; 38595fce210SBarry Smith 38640e23c03SJunchao Zhang if (UnpackAndOp) { 38740e23c03SJunchao Zhang (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart); 38895fce210SBarry Smith } 38962795ce3SStefano Zampini #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 39078ec4774SToby Isaac else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 39178ec4774SToby Isaac PetscInt j; 3921620da3bSToby Isaac 3931620da3bSToby Isaac for (j = 0; j < n; j++) { 39478ec4774SToby Isaac ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); 3951620da3bSToby Isaac } 3961620da3bSToby Isaac } 397a9ec7e9eSToby Isaac #else 398d2a25953SJunchao Zhang else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 399a9ec7e9eSToby Isaac #endif 4001620da3bSToby Isaac } 40140e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 40295fce210SBarry Smith PetscFunctionReturn(0); 40395fce210SBarry Smith } 40495fce210SBarry Smith 40540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 40695fce210SBarry Smith { 40795fce210SBarry Smith PetscErrorCode ierr; 40895fce210SBarry Smith 40995fce210SBarry Smith PetscFunctionBegin; 41040e23c03SJunchao Zhang ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 41195fce210SBarry Smith PetscFunctionReturn(0); 41295fce210SBarry Smith } 41395fce210SBarry Smith 41495fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 41595fce210SBarry Smith { 41640e23c03SJunchao Zhang PetscErrorCode (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); 41795fce210SBarry Smith PetscErrorCode ierr; 41840e23c03SJunchao Zhang PetscSFPack_Basic link; 419c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 42095fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 42195fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 42295fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 42330e38525SJunchao Zhang PetscMPIInt n; 42440e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 42595fce210SBarry Smith 42695fce210SBarry Smith PetscFunctionBegin; 42740e23c03SJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 42895fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 42940e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 43040e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 43140e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 43240e23c03SJunchao Zhang 43340e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 43495fce210SBarry Smith /* Post leaf receives */ 43530e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr); 43640e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); 43730e38525SJunchao Zhang 43895fce210SBarry Smith /* Process local fetch-and-op, post root sends */ 43940e23c03SJunchao Zhang ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr); 44095fce210SBarry Smith for (i=0; i<nrootranks; i++) { 441bf39f1bfSJed Brown void *packstart = link->root[i]; 44230e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 44340e23c03SJunchao Zhang (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart); 444c943f53fSJed Brown if (i < ndrootranks) continue; /* shared memory */ 44540e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr); 44695fce210SBarry Smith } 44740e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 44895fce210SBarry Smith for (i=0; i<nleafranks; i++) { 449bf39f1bfSJed Brown const void *packstart = link->leaf[i]; 45030e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 45140e23c03SJunchao Zhang (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart); 45295fce210SBarry Smith } 45340e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 45495fce210SBarry Smith PetscFunctionReturn(0); 45595fce210SBarry Smith } 45695fce210SBarry Smith 45740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 4588750ddebSJunchao Zhang { 4598750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 4608750ddebSJunchao Zhang 4618750ddebSJunchao Zhang PetscFunctionBegin; 4628750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 4638750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 4648750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 4658750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 4668750ddebSJunchao Zhang PetscFunctionReturn(0); 4678750ddebSJunchao Zhang } 4688750ddebSJunchao Zhang 469*f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf. 470*f659e5c7SJunchao Zhang We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 471*f659e5c7SJunchao Zhang was sorted before calling the routine. 472*f659e5c7SJunchao Zhang */ 473*f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 474*f659e5c7SJunchao Zhang { 475*f659e5c7SJunchao Zhang PetscSF esf; 476*f659e5c7SJunchao Zhang PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 477*f659e5c7SJunchao Zhang PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,*leafbuf,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 478*f659e5c7SJunchao Zhang PetscMPIInt *esf_ranks; 479*f659e5c7SJunchao Zhang const PetscMPIInt *ranks,*iranks; 480*f659e5c7SJunchao Zhang const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 481*f659e5c7SJunchao Zhang PetscBool connected; 482*f659e5c7SJunchao Zhang PetscSFPack_Basic link; 483*f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 484*f659e5c7SJunchao Zhang PetscSF_Basic *bas; 485*f659e5c7SJunchao Zhang PetscErrorCode ierr; 486*f659e5c7SJunchao Zhang 487*f659e5c7SJunchao Zhang PetscFunctionBegin; 488*f659e5c7SJunchao Zhang ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 489*f659e5c7SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 490*f659e5c7SJunchao Zhang 491*f659e5c7SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf */ 492*f659e5c7SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 493*f659e5c7SJunchao Zhang ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 494*f659e5c7SJunchao Zhang /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */ 495*f659e5c7SJunchao Zhang maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 496*f659e5c7SJunchao Zhang ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 497*f659e5c7SJunchao Zhang /* Tag selected roots */ 498*f659e5c7SJunchao Zhang for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 499*f659e5c7SJunchao Zhang 500*f659e5c7SJunchao Zhang /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in 501*f659e5c7SJunchao Zhang sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are 502*f659e5c7SJunchao Zhang interested in since it tells which leaves are connected to which ranks. 503*f659e5c7SJunchao Zhang */ 504*f659e5c7SJunchao Zhang ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,rootdata,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */ 505*f659e5c7SJunchao Zhang ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 506*f659e5c7SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 507*f659e5c7SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 508*f659e5c7SJunchao Zhang esf_nranks = esf_ndranks = connected_leaves = 0; 509*f659e5c7SJunchao Zhang for (i=0; i<nranks; i++) { /* Scan leaf data to calculate some counts */ 510*f659e5c7SJunchao Zhang leafbuf = (PetscInt*)link->leaf[i]; 511*f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */ 512*f659e5c7SJunchao Zhang for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) { 513*f659e5c7SJunchao Zhang if (leafbuf[k]) { 514*f659e5c7SJunchao Zhang connected_leaves++; 515*f659e5c7SJunchao Zhang connected = PETSC_TRUE; 516*f659e5c7SJunchao Zhang } 517*f659e5c7SJunchao Zhang } 518*f659e5c7SJunchao Zhang if (connected) {esf_nranks++; if (i<ndranks) esf_ndranks++;} 519*f659e5c7SJunchao Zhang } 520*f659e5c7SJunchao Zhang 521*f659e5c7SJunchao Zhang /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 522*f659e5c7SJunchao Zhang ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 523*f659e5c7SJunchao Zhang ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 524*f659e5c7SJunchao Zhang ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr); 525*f659e5c7SJunchao Zhang p = 0; /* Counter for connected root ranks */ 526*f659e5c7SJunchao Zhang q = 0; /* Counter for connected leaves */ 527*f659e5c7SJunchao Zhang esf_roffset[0] = 0; 528*f659e5c7SJunchao Zhang for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 529*f659e5c7SJunchao Zhang leafbuf = (PetscInt*)link->leaf[i]; 530*f659e5c7SJunchao Zhang connected = PETSC_FALSE; 531*f659e5c7SJunchao Zhang for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) { 532*f659e5c7SJunchao Zhang if (leafbuf[k]) { 533*f659e5c7SJunchao Zhang esf_rmine[q] = new_ilocal[q] = rmine[j]; 534*f659e5c7SJunchao Zhang esf_rremote[q] = rremote[j]; 535*f659e5c7SJunchao Zhang new_iremote[q].index = rremote[j]; 536*f659e5c7SJunchao Zhang new_iremote[q].rank = ranks[i]; 537*f659e5c7SJunchao Zhang connected = PETSC_TRUE; 538*f659e5c7SJunchao Zhang q++; 539*f659e5c7SJunchao Zhang } 540*f659e5c7SJunchao Zhang } 541*f659e5c7SJunchao Zhang if (connected) { 542*f659e5c7SJunchao Zhang esf_ranks[p] = ranks[i]; 543*f659e5c7SJunchao Zhang esf_roffset[p+1] = q; 544*f659e5c7SJunchao Zhang p++; 545*f659e5c7SJunchao Zhang } 546*f659e5c7SJunchao Zhang } 547*f659e5c7SJunchao Zhang 548*f659e5c7SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 549*f659e5c7SJunchao Zhang 550*f659e5c7SJunchao Zhang /* SetGraph internally resets the SF, so we only set its fields after the call */ 551*f659e5c7SJunchao Zhang ierr = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 552*f659e5c7SJunchao Zhang esf->nranks = esf_nranks; 553*f659e5c7SJunchao Zhang esf->ndranks = esf_ndranks; 554*f659e5c7SJunchao Zhang esf->ranks = esf_ranks; 555*f659e5c7SJunchao Zhang esf->roffset = esf_roffset; 556*f659e5c7SJunchao Zhang esf->rmine = esf_rmine; 557*f659e5c7SJunchao Zhang esf->rremote = esf_rremote; 558*f659e5c7SJunchao Zhang 559*f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 560*f659e5c7SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 561*f659e5c7SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 562*f659e5c7SJunchao Zhang /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 563*f659e5c7SJunchao Zhang expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once. 564*f659e5c7SJunchao Zhang */ 565*f659e5c7SJunchao Zhang ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 566*f659e5c7SJunchao Zhang ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 567*f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 568*f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 569*f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 570*f659e5c7SJunchao Zhang for (i=0; i<niranks; i++) { 571*f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 572*f659e5c7SJunchao Zhang for (j=ioffset[i]; j<ioffset[i+1]; j++) { 573*f659e5c7SJunchao Zhang PetscInt loc; 574*f659e5c7SJunchao Zhang ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr); 575*f659e5c7SJunchao Zhang if (loc >= 0) { /* Found in selected this root is connected */ 576*f659e5c7SJunchao Zhang bas->irootloc[q++] = irootloc[j]; 577*f659e5c7SJunchao Zhang connected = PETSC_TRUE; 578*f659e5c7SJunchao Zhang } 579*f659e5c7SJunchao Zhang } 580*f659e5c7SJunchao Zhang if (connected) { 581*f659e5c7SJunchao Zhang bas->niranks++; 582*f659e5c7SJunchao Zhang if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 583*f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 584*f659e5c7SJunchao Zhang bas->ioffset[p+1] = q; 585*f659e5c7SJunchao Zhang p++; 586*f659e5c7SJunchao Zhang } 587*f659e5c7SJunchao Zhang } 588*f659e5c7SJunchao Zhang bas->itotal = q; 589*f659e5c7SJunchao Zhang 590*f659e5c7SJunchao Zhang /* Setup packing optimizations */ 591*f659e5c7SJunchao Zhang ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr); 592*f659e5c7SJunchao Zhang ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 593*f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 594*f659e5c7SJunchao Zhang 595*f659e5c7SJunchao Zhang ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 596*f659e5c7SJunchao Zhang *newsf = esf; 597*f659e5c7SJunchao Zhang PetscFunctionReturn(0); 598*f659e5c7SJunchao Zhang } 599*f659e5c7SJunchao Zhang 600*f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 601*f659e5c7SJunchao Zhang { 602*f659e5c7SJunchao Zhang PetscSF esf; 603*f659e5c7SJunchao Zhang PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal; 604*f659e5c7SJunchao Zhang const PetscInt *ilocal,*ioffset,*irootloc; 605*f659e5c7SJunchao Zhang const PetscMPIInt *iranks; 606*f659e5c7SJunchao Zhang PetscSFPack_Basic link; 607*f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 608*f659e5c7SJunchao Zhang const PetscSFNode *iremote; 609*f659e5c7SJunchao Zhang PetscSF_Basic *bas; 610*f659e5c7SJunchao Zhang MPI_Group group; 611*f659e5c7SJunchao Zhang PetscErrorCode ierr; 612*f659e5c7SJunchao Zhang 613*f659e5c7SJunchao Zhang PetscFunctionBegin; 614*f659e5c7SJunchao Zhang ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 615*f659e5c7SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 616*f659e5c7SJunchao Zhang 617*f659e5c7SJunchao Zhang /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */ 618*f659e5c7SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 619*f659e5c7SJunchao Zhang ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 620*f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 621*f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 622*f659e5c7SJunchao Zhang for (i=0; i<nselected; i++) { 623*f659e5c7SJunchao Zhang const PetscInt l = selected[i]; 624*f659e5c7SJunchao Zhang new_ilocal[i] = ilocal ? ilocal[l] : l; 625*f659e5c7SJunchao Zhang new_iremote[i].rank = iremote[l].rank; 626*f659e5c7SJunchao Zhang new_iremote[i].index = iremote[l].index; 627*f659e5c7SJunchao Zhang } 628*f659e5c7SJunchao Zhang 629*f659e5c7SJunchao Zhang /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */ 630*f659e5c7SJunchao Zhang maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 631*f659e5c7SJunchao Zhang ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 632*f659e5c7SJunchao Zhang for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */ 633*f659e5c7SJunchao Zhang 634*f659e5c7SJunchao Zhang ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 635*f659e5c7SJunchao Zhang 636*f659e5c7SJunchao Zhang /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to 637*f659e5c7SJunchao Zhang map rmine[i] (ilocal of leaves) back to selected[j] (leaf indices). 638*f659e5c7SJunchao Zhang */ 639*f659e5c7SJunchao Zhang ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 640*f659e5c7SJunchao Zhang ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr); 641*f659e5c7SJunchao Zhang ierr = MPI_Group_free(&group);CHKERRQ(ierr); 642*f659e5c7SJunchao Zhang 643*f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) */ 644*f659e5c7SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); 645*f659e5c7SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 646*f659e5c7SJunchao Zhang ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 647*f659e5c7SJunchao Zhang ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 648*f659e5c7SJunchao Zhang 649*f659e5c7SJunchao Zhang /* Pass info about selected leaves to root buffer */ 650*f659e5c7SJunchao Zhang ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,leafdata-minleaf,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */ 651*f659e5c7SJunchao Zhang ierr = PetscSFPackGetInUse(sf,MPIU_INT,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 652*f659e5c7SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 653*f659e5c7SJunchao Zhang 654*f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 655*f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 656*f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 657*f659e5c7SJunchao Zhang for (i=0; i<niranks; i++) { 658*f659e5c7SJunchao Zhang PetscInt *rootbuf = (PetscInt*)link->root[i]; 659*f659e5c7SJunchao Zhang PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 660*f659e5c7SJunchao Zhang for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) { 661*f659e5c7SJunchao Zhang if (rootbuf[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;} 662*f659e5c7SJunchao Zhang } 663*f659e5c7SJunchao Zhang if (connected) { 664*f659e5c7SJunchao Zhang bas->niranks++; 665*f659e5c7SJunchao Zhang if (i<ndiranks) bas->ndiranks++; 666*f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 667*f659e5c7SJunchao Zhang bas->ioffset[p+1] = q; 668*f659e5c7SJunchao Zhang p++; 669*f659e5c7SJunchao Zhang } 670*f659e5c7SJunchao Zhang } 671*f659e5c7SJunchao Zhang bas->itotal = q; 672*f659e5c7SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 673*f659e5c7SJunchao Zhang 674*f659e5c7SJunchao Zhang /* Setup packing optimizations */ 675*f659e5c7SJunchao Zhang ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr); 676*f659e5c7SJunchao Zhang ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 677*f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 678*f659e5c7SJunchao Zhang 679*f659e5c7SJunchao Zhang ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 680*f659e5c7SJunchao Zhang *newsf = esf; 681*f659e5c7SJunchao Zhang PetscFunctionReturn(0); 682*f659e5c7SJunchao Zhang } 683*f659e5c7SJunchao Zhang 6848cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 68595fce210SBarry Smith { 68640e23c03SJunchao Zhang PetscSF_Basic *dat; 68795fce210SBarry Smith PetscErrorCode ierr; 68895fce210SBarry Smith 68995fce210SBarry Smith PetscFunctionBegin; 69095fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 69195fce210SBarry Smith sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic; 69295fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 69395fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 69495fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 6953482bfa8SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic; 6963482bfa8SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 69795fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 69895fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 69995fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 70095fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 7018750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 702*f659e5c7SJunchao Zhang sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 703*f659e5c7SJunchao Zhang sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic; 70495fce210SBarry Smith 70540e23c03SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 70640e23c03SJunchao Zhang sf->data = (void*)dat; 70795fce210SBarry Smith PetscFunctionReturn(0); 70895fce210SBarry Smith } 709