18cd53115SBarry Smith 240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 395fce210SBarry Smith 440e23c03SJunchao Zhang /*===================================================================================*/ 540e23c03SJunchao Zhang /* Internal routines for PetscSFPack_Basic */ 640e23c03SJunchao Zhang /*===================================================================================*/ 795fce210SBarry Smith 840e23c03SJunchao Zhang /* Return root and leaf MPI requests for communication in the given direction. If the requests have not been 940e23c03SJunchao Zhang initialized (since we use persistent requests), then initialize them. 1095fce210SBarry Smith */ 1140e23c03SJunchao Zhang static PetscErrorCode PetscSFPackGetReqs_Basic(PetscSF sf,MPI_Datatype unit,PetscSFPack_Basic link,PetscSFDirection direction,MPI_Request **rootreqs,MPI_Request **leafreqs) 1240e23c03SJunchao Zhang { 1340e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 1440e23c03SJunchao Zhang MPI_Request *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE)? link->requests : link->requests + link->half; 1540e23c03SJunchao Zhang PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 1640e23c03SJunchao Zhang const PetscInt *rootoffset,*leafoffset; 1740e23c03SJunchao Zhang PetscMPIInt n; 1840e23c03SJunchao Zhang MPI_Comm comm = PetscObjectComm((PetscObject)sf); 1940e23c03SJunchao Zhang PetscErrorCode ierr; 2095fce210SBarry Smith 2140e23c03SJunchao Zhang PetscFunctionBegin; 2295fce210SBarry Smith 2340e23c03SJunchao Zhang if (!link->initialized[direction]) { 2440e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 2540e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 2640e23c03SJunchao Zhang if (direction == PETSCSF_LEAF2ROOT_REDUCE) { 2740e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) { 2840e23c03SJunchao Zhang if (i >= ndrootranks) { 2940e23c03SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 3040e23c03SJunchao Zhang ierr = MPI_Recv_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr); 3140e23c03SJunchao Zhang } 3240e23c03SJunchao Zhang } 3340e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 3440e23c03SJunchao Zhang if (i >= ndleafranks) { 3540e23c03SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 3640e23c03SJunchao Zhang ierr = MPI_Send_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr); 3740e23c03SJunchao Zhang } 3840e23c03SJunchao Zhang } 3940e23c03SJunchao Zhang } else if (direction == PETSCSF_ROOT2LEAF_BCAST) { 4040e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) { 4140e23c03SJunchao Zhang if (i >= ndrootranks) { 4240e23c03SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 4340e23c03SJunchao Zhang ierr = MPI_Send_init(link->root[i],n,unit,bas->iranks[i],link->tag,comm,&requests[i]);CHKERRQ(ierr); 4440e23c03SJunchao Zhang } 4540e23c03SJunchao Zhang } 4640e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 4740e23c03SJunchao Zhang if (i >= ndleafranks) { 4840e23c03SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 4940e23c03SJunchao Zhang ierr = MPI_Recv_init(link->leaf[i],n,unit,sf->ranks[i],link->tag,comm,&requests[nrootranks+i]);CHKERRQ(ierr); 5040e23c03SJunchao Zhang } 5140e23c03SJunchao Zhang } 5240e23c03SJunchao Zhang } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Out-of-range PetscSFDirection = %D\n",direction); 5340e23c03SJunchao Zhang 5440e23c03SJunchao Zhang link->initialized[direction] = PETSC_TRUE; 5595fce210SBarry Smith } 5695fce210SBarry Smith 5740e23c03SJunchao Zhang if (rootreqs) *rootreqs = requests; 5840e23c03SJunchao Zhang if (leafreqs) *leafreqs = requests + bas->niranks; 5940e23c03SJunchao Zhang PetscFunctionReturn(0); 6095fce210SBarry Smith } 6195fce210SBarry Smith 62*9d1c8addSJunchao Zhang static PetscErrorCode PetscSFPackGet_Basic(PetscSF sf,MPI_Datatype unit,const void *rkey,const void *lkey,PetscSFDirection direction,PetscSFPack_Basic *mylink) 6340e23c03SJunchao Zhang { 6440e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 6540e23c03SJunchao Zhang PetscErrorCode ierr; 6640e23c03SJunchao Zhang PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 6740e23c03SJunchao Zhang const PetscInt *rootoffset,*leafoffset; 6840e23c03SJunchao Zhang PetscSFPack *p; 6940e23c03SJunchao Zhang PetscSFPack_Basic link; 7040e23c03SJunchao Zhang 7140e23c03SJunchao Zhang PetscFunctionBegin; 72*9d1c8addSJunchao Zhang ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rkey,lkey);CHKERRQ(ierr); 73*9d1c8addSJunchao Zhang 7440e23c03SJunchao Zhang /* Look for types in cache */ 7540e23c03SJunchao Zhang for (p=&bas->avail; (link=(PetscSFPack_Basic)*p); p=&link->next) { 7640e23c03SJunchao Zhang PetscBool match; 7740e23c03SJunchao Zhang ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 7840e23c03SJunchao Zhang if (match) { 7940e23c03SJunchao Zhang *p = link->next; /* Remove from available list */ 8040e23c03SJunchao Zhang goto found; 8140e23c03SJunchao Zhang } 8253deab39SPeter Brune } 8353deab39SPeter Brune 8440e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr); 8540e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL,NULL);CHKERRQ(ierr); 8640e23c03SJunchao Zhang ierr = PetscNew(&link);CHKERRQ(ierr); 8740e23c03SJunchao Zhang ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 8840e23c03SJunchao Zhang ierr = PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);CHKERRQ(ierr); 8940e23c03SJunchao Zhang /* Double the requests. First half are used for reduce (leaf2root) communication, second half for bcast (root2leaf) communication */ 9040e23c03SJunchao Zhang link->half = nrootranks + nleafranks; 9140e23c03SJunchao Zhang ierr = PetscMalloc1(link->half*2,&link->requests);CHKERRQ(ierr); 9240e23c03SJunchao Zhang for (i=0; i<link->half*2; i++) link->requests[i] = MPI_REQUEST_NULL; /* Must be init'ed since some are unused but we call MPI_Waitall on them in whole */ 9340e23c03SJunchao Zhang /* One tag per link */ 9440e23c03SJunchao Zhang ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); 9553deab39SPeter Brune 9640e23c03SJunchao Zhang /* Allocate root and leaf buffers */ 9740e23c03SJunchao Zhang for (i=0; i<nrootranks; i++) {ierr = PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);CHKERRQ(ierr);} 9840e23c03SJunchao Zhang for (i=0; i<nleafranks; i++) { 9940e23c03SJunchao Zhang if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */ 10040e23c03SJunchao Zhang if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks"); 10140e23c03SJunchao Zhang link->leaf[i] = link->root[0]; 10240e23c03SJunchao Zhang continue; 10340e23c03SJunchao Zhang } 10440e23c03SJunchao Zhang ierr = PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);CHKERRQ(ierr); 10553deab39SPeter Brune } 10653deab39SPeter Brune 10740e23c03SJunchao Zhang found: 108*9d1c8addSJunchao Zhang link->rkey = rkey; 109*9d1c8addSJunchao Zhang link->lkey = lkey; 11040e23c03SJunchao Zhang link->next = bas->inuse; 11140e23c03SJunchao Zhang bas->inuse = (PetscSFPack)link; 11240e23c03SJunchao Zhang 11340e23c03SJunchao Zhang *mylink = link; 11440e23c03SJunchao Zhang PetscFunctionReturn(0); 11595fce210SBarry Smith } 11695fce210SBarry Smith 11740e23c03SJunchao Zhang /*===================================================================================*/ 11840e23c03SJunchao Zhang /* SF public interface implementations */ 11940e23c03SJunchao Zhang /*===================================================================================*/ 12040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 12195fce210SBarry Smith { 12295fce210SBarry Smith PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 12395fce210SBarry Smith PetscErrorCode ierr; 12495fce210SBarry Smith PetscInt *rlengths,*ilengths,i; 12540e23c03SJunchao Zhang PetscMPIInt rank,niranks,*iranks,tag; 12695fce210SBarry Smith MPI_Comm comm; 127b5a8e515SJed Brown MPI_Group group; 12840e23c03SJunchao Zhang MPI_Request *rootreqs,*leafreqs; 12995fce210SBarry Smith 13095fce210SBarry Smith PetscFunctionBegin; 131b5a8e515SJed Brown ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 132b5a8e515SJed Brown ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 133b5a8e515SJed Brown ierr = MPI_Group_free(&group);CHKERRQ(ierr); 13495fce210SBarry Smith ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 13540e23c03SJunchao Zhang ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 136c943f53fSJed Brown ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 13795fce210SBarry Smith /* 13895fce210SBarry Smith * Inform roots about how many leaves and from which ranks 13995fce210SBarry Smith */ 140785e854fSJed Brown ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 14195fce210SBarry Smith /* Determine number, sending ranks, and length of incoming */ 14295fce210SBarry Smith for (i=0; i<sf->nranks; i++) { 14395fce210SBarry Smith rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 14495fce210SBarry Smith } 14540e23c03SJunchao Zhang ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 146c943f53fSJed Brown 1470b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 1480b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 1490b899082SJunchao Zhang small and the sorting is cheap. 1500b899082SJunchao Zhang */ 1510b899082SJunchao Zhang ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 1520b899082SJunchao Zhang 153c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 154c943f53fSJed Brown bas->ndiranks = sf->ndranks; 155c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 156c943f53fSJed Brown ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 157c943f53fSJed Brown bas->ioffset[0] = 0; 158c943f53fSJed Brown for (i=0; i<bas->ndiranks; i++) { 159c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 160c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 161c943f53fSJed Brown } 16240e23c03SJunchao Zhang if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 16340e23c03SJunchao Zhang for ( ; i<bas->niranks; i++) { 164c943f53fSJed Brown bas->iranks[i] = iranks[i-bas->ndiranks]; 165c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 166c943f53fSJed Brown } 167c943f53fSJed Brown bas->itotal = bas->ioffset[i]; 16895fce210SBarry Smith ierr = PetscFree(rlengths);CHKERRQ(ierr); 169c943f53fSJed Brown ierr = PetscFree(iranks);CHKERRQ(ierr); 170c943f53fSJed Brown ierr = PetscFree(ilengths);CHKERRQ(ierr); 17195fce210SBarry Smith 17295fce210SBarry Smith /* Send leaf identities to roots */ 173c943f53fSJed Brown ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 17440e23c03SJunchao Zhang ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 17540e23c03SJunchao Zhang for (i=bas->ndiranks; i<bas->niranks; i++) { 17640e23c03SJunchao Zhang ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 17740e23c03SJunchao Zhang } 17840e23c03SJunchao Zhang for (i=0; i<sf->nranks; i++) { 17995fce210SBarry Smith PetscMPIInt npoints; 18095fce210SBarry Smith ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 18140e23c03SJunchao Zhang if (i < sf->ndranks) { 18240e23c03SJunchao Zhang if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 18340e23c03SJunchao Zhang if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 18440e23c03SJunchao Zhang if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 18540e23c03SJunchao Zhang ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 186c943f53fSJed Brown continue; 187c943f53fSJed Brown } 18840e23c03SJunchao Zhang ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 189bf39f1bfSJed Brown } 19040e23c03SJunchao Zhang ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 19140e23c03SJunchao Zhang ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 19240e23c03SJunchao Zhang ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 19395fce210SBarry Smith 19440e23c03SJunchao Zhang /* Setup packing optimization for root and leaf */ 19540e23c03SJunchao Zhang ierr = PetscSFPackSetupOptimization(sf->nranks,sf->roffset,sf->rmine,&sf->leafpackopt);CHKERRQ(ierr); 19640e23c03SJunchao Zhang ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 19795fce210SBarry Smith PetscFunctionReturn(0); 19895fce210SBarry Smith } 19995fce210SBarry Smith 2004416b707SBarry Smith static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf) 20195fce210SBarry Smith { 20295fce210SBarry Smith PetscErrorCode ierr; 20395fce210SBarry Smith 20495fce210SBarry Smith PetscFunctionBegin; 205e55864a3SBarry Smith ierr = PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");CHKERRQ(ierr); 20695fce210SBarry Smith ierr = PetscOptionsTail();CHKERRQ(ierr); 20795fce210SBarry Smith PetscFunctionReturn(0); 20895fce210SBarry Smith } 20995fce210SBarry Smith 21040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 21195fce210SBarry Smith { 21295fce210SBarry Smith PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 21395fce210SBarry Smith PetscErrorCode ierr; 21440e23c03SJunchao Zhang PetscSFPack_Basic link,next; 21595fce210SBarry Smith 21695fce210SBarry Smith PetscFunctionBegin; 21729046d53SLisandro Dalcin if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 218c943f53fSJed Brown ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 219c943f53fSJed Brown ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 22040e23c03SJunchao Zhang for (link=(PetscSFPack_Basic)bas->avail; link; link=next) { 221bf39f1bfSJed Brown PetscInt i; 22240e23c03SJunchao Zhang next = (PetscSFPack_Basic)link->next; 22377b7e48dSJunchao Zhang if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 224bf39f1bfSJed Brown for (i=0; i<bas->niranks; i++) {ierr = PetscFree(link->root[i]);CHKERRQ(ierr);} 225c943f53fSJed Brown for (i=sf->ndranks; i<sf->nranks; i++) {ierr = PetscFree(link->leaf[i]);CHKERRQ(ierr);} /* Free only non-distinguished leaf buffers */ 22695fce210SBarry Smith ierr = PetscFree2(link->root,link->leaf);CHKERRQ(ierr); 22730e38525SJunchao Zhang /* Free persistent requests using MPI_Request_free */ 22840e23c03SJunchao Zhang for (i=0; i<link->half*2; i++) { 22940e23c03SJunchao Zhang if (link->requests[i] != MPI_REQUEST_NULL) {ierr = MPI_Request_free(&link->requests[i]);CHKERRQ(ierr);} 23030e38525SJunchao Zhang } 23195fce210SBarry Smith ierr = PetscFree(link->requests);CHKERRQ(ierr); 23295fce210SBarry Smith ierr = PetscFree(link);CHKERRQ(ierr); 23395fce210SBarry Smith } 23495fce210SBarry Smith bas->avail = NULL; 23540e23c03SJunchao Zhang ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr); 23640e23c03SJunchao Zhang ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr); 23795fce210SBarry Smith PetscFunctionReturn(0); 23895fce210SBarry Smith } 23995fce210SBarry Smith 24040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 24195fce210SBarry Smith { 24295fce210SBarry Smith PetscErrorCode ierr; 24395fce210SBarry Smith 24495fce210SBarry Smith PetscFunctionBegin; 24540e23c03SJunchao Zhang ierr = PetscSFReset(sf);CHKERRQ(ierr); 24695fce210SBarry Smith ierr = PetscFree(sf->data);CHKERRQ(ierr); 24795fce210SBarry Smith PetscFunctionReturn(0); 24895fce210SBarry Smith } 24995fce210SBarry Smith 25040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 25195fce210SBarry Smith { 25295fce210SBarry Smith PetscErrorCode ierr; 25395fce210SBarry Smith PetscBool iascii; 25495fce210SBarry Smith 25595fce210SBarry Smith PetscFunctionBegin; 25695fce210SBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 25795fce210SBarry Smith if (iascii) { 25895fce210SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr); 25995fce210SBarry Smith } 26095fce210SBarry Smith PetscFunctionReturn(0); 26195fce210SBarry Smith } 26295fce210SBarry Smith 2633482bfa8SJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 26495fce210SBarry Smith { 26595fce210SBarry Smith PetscErrorCode ierr; 26640e23c03SJunchao Zhang PetscSFPack_Basic link; 267c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 26895fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 26995fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 27095fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 27130e38525SJunchao Zhang PetscMPIInt n; 27240e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 27395fce210SBarry Smith 27495fce210SBarry Smith PetscFunctionBegin; 27540e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 27640e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 277*9d1c8addSJunchao Zhang ierr = PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_ROOT2LEAF_BCAST,&link);CHKERRQ(ierr); 27895fce210SBarry Smith 27940e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 280c943f53fSJed Brown /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */ 28130e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr); 28240e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); /* One can wait but not start a null request */ 28330e38525SJunchao Zhang 28495fce210SBarry Smith /* Pack and send root data */ 28595fce210SBarry Smith for (i=0; i<nrootranks; i++) { 286bf39f1bfSJed Brown void *packstart = link->root[i]; 28730e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 28840e23c03SJunchao Zhang (*link->Pack)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart); 289c943f53fSJed Brown if (i < ndrootranks) continue; /* shared memory */ 29040e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr); 29195fce210SBarry Smith } 29295fce210SBarry Smith PetscFunctionReturn(0); 29395fce210SBarry Smith } 29495fce210SBarry Smith 29540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 29695fce210SBarry Smith { 29795fce210SBarry Smith PetscErrorCode ierr; 29840e23c03SJunchao Zhang PetscSFPack_Basic link; 299c943f53fSJed Brown PetscInt i,nleafranks,ndleafranks; 30095fce210SBarry Smith const PetscInt *leafoffset,*leafloc; 30140e23c03SJunchao Zhang PetscErrorCode (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 3023482bfa8SJunchao Zhang PetscMPIInt typesize = -1; 30395fce210SBarry Smith 30495fce210SBarry Smith PetscFunctionBegin; 305*9d1c8addSJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 30640e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 30740e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 30840e23c03SJunchao Zhang ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 3093482bfa8SJunchao Zhang 31040e23c03SJunchao Zhang if (UnpackAndOp) { typesize = link->unitbytes; } 3113482bfa8SJunchao Zhang else { ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); } 3123482bfa8SJunchao Zhang 31395fce210SBarry Smith for (i=0; i<nleafranks; i++) { 31495fce210SBarry Smith PetscMPIInt n = leafoffset[i+1] - leafoffset[i]; 3153482bfa8SJunchao Zhang char *packstart = (char *) link->leaf[i]; 31640e23c03SJunchao Zhang if (UnpackAndOp) { (*UnpackAndOp)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,(const void *)packstart); } 3173482bfa8SJunchao Zhang #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 3183482bfa8SJunchao Zhang else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 3193482bfa8SJunchao Zhang PetscInt j; 3203482bfa8SJunchao Zhang for (j=0; j<n; j++) { ierr = MPI_Reduce_local(packstart+j*typesize,((char *) leafdata)+(leafloc[leafoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); } 32195fce210SBarry Smith } 3223482bfa8SJunchao Zhang #else 323d2a25953SJunchao Zhang else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 3243482bfa8SJunchao Zhang #endif 3253482bfa8SJunchao Zhang } 3263482bfa8SJunchao Zhang 32740e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 32895fce210SBarry Smith PetscFunctionReturn(0); 32995fce210SBarry Smith } 33095fce210SBarry Smith 33195fce210SBarry Smith /* leaf -> root with reduction */ 33240e23c03SJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 33395fce210SBarry Smith { 33440e23c03SJunchao Zhang PetscSFPack_Basic link; 33595fce210SBarry Smith PetscErrorCode ierr; 336c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 33795fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 33895fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 33995fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 34030e38525SJunchao Zhang PetscMPIInt n; 34195fce210SBarry Smith 34295fce210SBarry Smith PetscFunctionBegin; 34340e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 34440e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 345*9d1c8addSJunchao Zhang ierr = PetscSFPackGet_Basic(sf,unit,rootdata,leafdata,PETSCSF_LEAF2ROOT_REDUCE,&link);CHKERRQ(ierr); 34695fce210SBarry Smith 34740e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_LEAF2ROOT_REDUCE,&rootreqs,&leafreqs);CHKERRQ(ierr); 348c943f53fSJed Brown /* Eagerly post root receives for non-distinguished ranks */ 34930e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[nrootranks]-rootoffset[ndrootranks],&n);CHKERRQ(ierr); 35040e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nrootranks-ndrootranks,rootreqs+ndrootranks);CHKERRQ(ierr); 35130e38525SJunchao Zhang 35295fce210SBarry Smith /* Pack and send leaf data */ 35395fce210SBarry Smith for (i=0; i<nleafranks; i++) { 354bf39f1bfSJed Brown void *packstart = link->leaf[i]; 35530e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 35640e23c03SJunchao Zhang (*link->Pack)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafdata,packstart); 357c943f53fSJed Brown if (i < ndleafranks) continue; /* shared memory */ 35840e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&leafreqs[i]);CHKERRQ(ierr); 35995fce210SBarry Smith } 36095fce210SBarry Smith PetscFunctionReturn(0); 36195fce210SBarry Smith } 36295fce210SBarry Smith 36340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 36495fce210SBarry Smith { 36540e23c03SJunchao Zhang PetscErrorCode (*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 36695fce210SBarry Smith PetscErrorCode ierr; 36740e23c03SJunchao Zhang PetscSFPack_Basic link; 36895fce210SBarry Smith PetscInt i,nrootranks; 3691620da3bSToby Isaac PetscMPIInt typesize = -1; 37095fce210SBarry Smith const PetscInt *rootoffset,*rootloc; 37140e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 37295fce210SBarry Smith 37395fce210SBarry Smith PetscFunctionBegin; 374*9d1c8addSJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 37595fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 37640e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 37740e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);CHKERRQ(ierr); 37840e23c03SJunchao Zhang ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 37940e23c03SJunchao Zhang if (UnpackAndOp) { 3801620da3bSToby Isaac typesize = link->unitbytes; 3811620da3bSToby Isaac } 3821620da3bSToby Isaac else { 3831620da3bSToby Isaac ierr = MPI_Type_size(unit,&typesize);CHKERRQ(ierr); 3841620da3bSToby Isaac } 38595fce210SBarry Smith for (i=0; i<nrootranks; i++) { 38695fce210SBarry Smith PetscMPIInt n = rootoffset[i+1] - rootoffset[i]; 387bf39f1bfSJed Brown char *packstart = (char *) link->root[i]; 38895fce210SBarry Smith 38940e23c03SJunchao Zhang if (UnpackAndOp) { 39040e23c03SJunchao Zhang (*UnpackAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,(const void *)packstart); 39195fce210SBarry Smith } 39262795ce3SStefano Zampini #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 39378ec4774SToby Isaac else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */ 39478ec4774SToby Isaac PetscInt j; 3951620da3bSToby Isaac 3961620da3bSToby Isaac for (j = 0; j < n; j++) { 39778ec4774SToby Isaac ierr = MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);CHKERRQ(ierr); 3981620da3bSToby Isaac } 3991620da3bSToby Isaac } 400a9ec7e9eSToby Isaac #else 401d2a25953SJunchao Zhang else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 402a9ec7e9eSToby Isaac #endif 4031620da3bSToby Isaac } 40440e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 40595fce210SBarry Smith PetscFunctionReturn(0); 40695fce210SBarry Smith } 40795fce210SBarry Smith 40840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 40995fce210SBarry Smith { 41095fce210SBarry Smith PetscErrorCode ierr; 41195fce210SBarry Smith 41295fce210SBarry Smith PetscFunctionBegin; 41340e23c03SJunchao Zhang ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 41495fce210SBarry Smith PetscFunctionReturn(0); 41595fce210SBarry Smith } 41695fce210SBarry Smith 41795fce210SBarry Smith static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 41895fce210SBarry Smith { 41940e23c03SJunchao Zhang PetscErrorCode (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); 42095fce210SBarry Smith PetscErrorCode ierr; 42140e23c03SJunchao Zhang PetscSFPack_Basic link; 422c943f53fSJed Brown PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks; 42395fce210SBarry Smith const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc; 42495fce210SBarry Smith const PetscMPIInt *rootranks,*leafranks; 42595fce210SBarry Smith MPI_Request *rootreqs,*leafreqs; 42630e38525SJunchao Zhang PetscMPIInt n; 42740e23c03SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 42895fce210SBarry Smith 42995fce210SBarry Smith PetscFunctionBegin; 430*9d1c8addSJunchao Zhang ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 43195fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 43240e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 43340e23c03SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);CHKERRQ(ierr); 43440e23c03SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc,NULL);CHKERRQ(ierr); 43540e23c03SJunchao Zhang 43640e23c03SJunchao Zhang ierr = PetscSFPackGetReqs_Basic(sf,unit,link,PETSCSF_ROOT2LEAF_BCAST,&rootreqs,&leafreqs);CHKERRQ(ierr); 43795fce210SBarry Smith /* Post leaf receives */ 43830e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[nleafranks]-leafoffset[ndleafranks],&n);CHKERRQ(ierr); 43940e23c03SJunchao Zhang ierr = MPI_Startall_irecv(n,unit,nleafranks-ndleafranks,leafreqs+ndleafranks);CHKERRQ(ierr); 44030e38525SJunchao Zhang 44195fce210SBarry Smith /* Process local fetch-and-op, post root sends */ 44240e23c03SJunchao Zhang ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr); 44395fce210SBarry Smith for (i=0; i<nrootranks; i++) { 444bf39f1bfSJed Brown void *packstart = link->root[i]; 44530e38525SJunchao Zhang ierr = PetscMPIIntCast(rootoffset[i+1]-rootoffset[i],&n);CHKERRQ(ierr); 44640e23c03SJunchao Zhang (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],i,bas->rootpackopt,rootdata,packstart); 447c943f53fSJed Brown if (i < ndrootranks) continue; /* shared memory */ 44840e23c03SJunchao Zhang ierr = MPI_Start_isend(n,unit,&rootreqs[i]);CHKERRQ(ierr); 44995fce210SBarry Smith } 45040e23c03SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 45195fce210SBarry Smith for (i=0; i<nleafranks; i++) { 452bf39f1bfSJed Brown const void *packstart = link->leaf[i]; 45330e38525SJunchao Zhang ierr = PetscMPIIntCast(leafoffset[i+1]-leafoffset[i],&n);CHKERRQ(ierr); 45440e23c03SJunchao Zhang (*link->UnpackAndInsert)(n,link->bs,leafloc+leafoffset[i],i,sf->leafpackopt,leafupdate,packstart); 45595fce210SBarry Smith } 45640e23c03SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 45795fce210SBarry Smith PetscFunctionReturn(0); 45895fce210SBarry Smith } 45995fce210SBarry Smith 46040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 4618750ddebSJunchao Zhang { 4628750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 4638750ddebSJunchao Zhang 4648750ddebSJunchao Zhang PetscFunctionBegin; 4658750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 4668750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 4678750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 4688750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 4698750ddebSJunchao Zhang PetscFunctionReturn(0); 4708750ddebSJunchao Zhang } 4718750ddebSJunchao Zhang 472f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf. 473f659e5c7SJunchao Zhang We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 474f659e5c7SJunchao Zhang was sorted before calling the routine. 475f659e5c7SJunchao Zhang */ 476f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 477f659e5c7SJunchao Zhang { 478f659e5c7SJunchao Zhang PetscSF esf; 479f659e5c7SJunchao Zhang PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 480f659e5c7SJunchao Zhang PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,*leafbuf,connected_leaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 481f659e5c7SJunchao Zhang PetscMPIInt *esf_ranks; 482f659e5c7SJunchao Zhang const PetscMPIInt *ranks,*iranks; 483f659e5c7SJunchao Zhang const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 484f659e5c7SJunchao Zhang PetscBool connected; 485f659e5c7SJunchao Zhang PetscSFPack_Basic link; 486f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 487f659e5c7SJunchao Zhang PetscSF_Basic *bas; 488f659e5c7SJunchao Zhang PetscErrorCode ierr; 489f659e5c7SJunchao Zhang 490f659e5c7SJunchao Zhang PetscFunctionBegin; 491f659e5c7SJunchao Zhang ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 492f659e5c7SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 493f659e5c7SJunchao Zhang 494f659e5c7SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf */ 495f659e5c7SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 496f659e5c7SJunchao Zhang ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 497f659e5c7SJunchao Zhang /* We abused the term leafdata here, whose size is usually the number of leaf data items. Here its size is # of leaves (always >= # of leaf data items) */ 498f659e5c7SJunchao Zhang maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 499f659e5c7SJunchao Zhang ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 500f659e5c7SJunchao Zhang /* Tag selected roots */ 501f659e5c7SJunchao Zhang for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 502f659e5c7SJunchao Zhang 503f659e5c7SJunchao Zhang /* Bcast from roots to leaves to tag connected leaves. We reuse the established bcast communication in 504f659e5c7SJunchao Zhang sf but do not do unpacking (from leaf buffer to leafdata). The raw data in leaf buffer is what we are 505f659e5c7SJunchao Zhang interested in since it tells which leaves are connected to which ranks. 506f659e5c7SJunchao Zhang */ 507f659e5c7SJunchao Zhang ierr = PetscSFBcastAndOpBegin_Basic(sf,MPIU_INT,rootdata,leafdata-minleaf,MPIU_REPLACE);CHKERRQ(ierr); /* Need to give leafdata but we won't use it */ 508*9d1c8addSJunchao Zhang ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 509f659e5c7SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_ROOT2LEAF_BCAST);CHKERRQ(ierr); 510f659e5c7SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 511f659e5c7SJunchao Zhang esf_nranks = esf_ndranks = connected_leaves = 0; 512f659e5c7SJunchao Zhang for (i=0; i<nranks; i++) { /* Scan leaf data to calculate some counts */ 513f659e5c7SJunchao Zhang leafbuf = (PetscInt*)link->leaf[i]; 514f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote root rank? */ 515f659e5c7SJunchao Zhang for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) { 516f659e5c7SJunchao Zhang if (leafbuf[k]) { 517f659e5c7SJunchao Zhang connected_leaves++; 518f659e5c7SJunchao Zhang connected = PETSC_TRUE; 519f659e5c7SJunchao Zhang } 520f659e5c7SJunchao Zhang } 521f659e5c7SJunchao Zhang if (connected) {esf_nranks++; if (i<ndranks) esf_ndranks++;} 522f659e5c7SJunchao Zhang } 523f659e5c7SJunchao Zhang 524f659e5c7SJunchao Zhang /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 525f659e5c7SJunchao Zhang ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr); 526f659e5c7SJunchao Zhang ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr); 527f659e5c7SJunchao Zhang ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,connected_leaves,&esf_rmine,connected_leaves,&esf_rremote);CHKERRQ(ierr); 528f659e5c7SJunchao Zhang p = 0; /* Counter for connected root ranks */ 529f659e5c7SJunchao Zhang q = 0; /* Counter for connected leaves */ 530f659e5c7SJunchao Zhang esf_roffset[0] = 0; 531f659e5c7SJunchao Zhang for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 532f659e5c7SJunchao Zhang leafbuf = (PetscInt*)link->leaf[i]; 533f659e5c7SJunchao Zhang connected = PETSC_FALSE; 534f659e5c7SJunchao Zhang for (j=roffset[i],k=0; j<roffset[i+1]; j++,k++) { 535f659e5c7SJunchao Zhang if (leafbuf[k]) { 536f659e5c7SJunchao Zhang esf_rmine[q] = new_ilocal[q] = rmine[j]; 537f659e5c7SJunchao Zhang esf_rremote[q] = rremote[j]; 538f659e5c7SJunchao Zhang new_iremote[q].index = rremote[j]; 539f659e5c7SJunchao Zhang new_iremote[q].rank = ranks[i]; 540f659e5c7SJunchao Zhang connected = PETSC_TRUE; 541f659e5c7SJunchao Zhang q++; 542f659e5c7SJunchao Zhang } 543f659e5c7SJunchao Zhang } 544f659e5c7SJunchao Zhang if (connected) { 545f659e5c7SJunchao Zhang esf_ranks[p] = ranks[i]; 546f659e5c7SJunchao Zhang esf_roffset[p+1] = q; 547f659e5c7SJunchao Zhang p++; 548f659e5c7SJunchao Zhang } 549f659e5c7SJunchao Zhang } 550f659e5c7SJunchao Zhang 551f659e5c7SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 552f659e5c7SJunchao Zhang 553f659e5c7SJunchao Zhang /* SetGraph internally resets the SF, so we only set its fields after the call */ 554f659e5c7SJunchao Zhang ierr = PetscSFSetGraph(esf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 555f659e5c7SJunchao Zhang esf->nranks = esf_nranks; 556f659e5c7SJunchao Zhang esf->ndranks = esf_ndranks; 557f659e5c7SJunchao Zhang esf->ranks = esf_ranks; 558f659e5c7SJunchao Zhang esf->roffset = esf_roffset; 559f659e5c7SJunchao Zhang esf->rmine = esf_rmine; 560f659e5c7SJunchao Zhang esf->rremote = esf_rremote; 561f659e5c7SJunchao Zhang 562f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 563f659e5c7SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 564f659e5c7SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 565f659e5c7SJunchao Zhang /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 566f659e5c7SJunchao Zhang expect these arrays are usually short, so we do not care. The benefit is we can fill these arrays by just parsing irootloc once. 567f659e5c7SJunchao Zhang */ 568f659e5c7SJunchao Zhang ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 569f659e5c7SJunchao Zhang ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 570f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 571f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 572f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 573f659e5c7SJunchao Zhang for (i=0; i<niranks; i++) { 574f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 575f659e5c7SJunchao Zhang for (j=ioffset[i]; j<ioffset[i+1]; j++) { 576f659e5c7SJunchao Zhang PetscInt loc; 577f659e5c7SJunchao Zhang ierr = PetscFindInt(irootloc[j],nselected,selected,&loc);CHKERRQ(ierr); 578f659e5c7SJunchao Zhang if (loc >= 0) { /* Found in selected this root is connected */ 579f659e5c7SJunchao Zhang bas->irootloc[q++] = irootloc[j]; 580f659e5c7SJunchao Zhang connected = PETSC_TRUE; 581f659e5c7SJunchao Zhang } 582f659e5c7SJunchao Zhang } 583f659e5c7SJunchao Zhang if (connected) { 584f659e5c7SJunchao Zhang bas->niranks++; 585f659e5c7SJunchao Zhang if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 586f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 587f659e5c7SJunchao Zhang bas->ioffset[p+1] = q; 588f659e5c7SJunchao Zhang p++; 589f659e5c7SJunchao Zhang } 590f659e5c7SJunchao Zhang } 591f659e5c7SJunchao Zhang bas->itotal = q; 592f659e5c7SJunchao Zhang 593f659e5c7SJunchao Zhang /* Setup packing optimizations */ 594f659e5c7SJunchao Zhang ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr); 595f659e5c7SJunchao Zhang ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 596f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 597f659e5c7SJunchao Zhang 598f659e5c7SJunchao Zhang ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 599f659e5c7SJunchao Zhang *newsf = esf; 600f659e5c7SJunchao Zhang PetscFunctionReturn(0); 601f659e5c7SJunchao Zhang } 602f659e5c7SJunchao Zhang 603f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 604f659e5c7SJunchao Zhang { 605f659e5c7SJunchao Zhang PetscSF esf; 606f659e5c7SJunchao Zhang PetscInt i,j,k,p,q,nroots,*rootdata,*leafdata,*new_ilocal,niranks,ndiranks,minleaf,maxleaf,maxlocal; 607f659e5c7SJunchao Zhang const PetscInt *ilocal,*ioffset,*irootloc; 608f659e5c7SJunchao Zhang const PetscMPIInt *iranks; 609f659e5c7SJunchao Zhang PetscSFPack_Basic link; 610f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 611f659e5c7SJunchao Zhang const PetscSFNode *iremote; 612f659e5c7SJunchao Zhang PetscSF_Basic *bas; 613f659e5c7SJunchao Zhang MPI_Group group; 614f659e5c7SJunchao Zhang PetscErrorCode ierr; 615f659e5c7SJunchao Zhang 616f659e5c7SJunchao Zhang PetscFunctionBegin; 617f659e5c7SJunchao Zhang ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 618f659e5c7SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 619f659e5c7SJunchao Zhang 620f659e5c7SJunchao Zhang /* Set the graph of esf, which is easy for CreateEmbeddedLeafSF */ 621f659e5c7SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 622f659e5c7SJunchao Zhang ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 623f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 624f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 625f659e5c7SJunchao Zhang for (i=0; i<nselected; i++) { 626f659e5c7SJunchao Zhang const PetscInt l = selected[i]; 627f659e5c7SJunchao Zhang new_ilocal[i] = ilocal ? ilocal[l] : l; 628f659e5c7SJunchao Zhang new_iremote[i].rank = iremote[l].rank; 629f659e5c7SJunchao Zhang new_iremote[i].index = iremote[l].index; 630f659e5c7SJunchao Zhang } 631f659e5c7SJunchao Zhang 632f659e5c7SJunchao Zhang /* Tag selected leaves before PetscSFSetGraph since new_ilocal might turn into NULL since we use PETSC_OWN_POINTER below */ 633f659e5c7SJunchao Zhang maxlocal = (minleaf > maxleaf)? 0 : maxleaf-minleaf+1; /* maxleaf=-1 and minleaf=0 when nleaves=0 */ 634f659e5c7SJunchao Zhang ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafdata);CHKERRQ(ierr); 635f659e5c7SJunchao Zhang for (i=0; i<nselected; i++) leafdata[new_ilocal[i]-minleaf] = 1; /* -minleaf to adjust indices according to minleaf */ 636f659e5c7SJunchao Zhang 637f659e5c7SJunchao Zhang ierr = PetscSFSetGraph(esf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 638f659e5c7SJunchao Zhang 639f659e5c7SJunchao Zhang /* Set up the outgoing communication (i.e., send info). We can not reuse rmine etc in sf since there is no way to 640f659e5c7SJunchao Zhang map rmine[i] (ilocal of leaves) back to selected[j] (leaf indices). 641f659e5c7SJunchao Zhang */ 642f659e5c7SJunchao Zhang ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 643f659e5c7SJunchao Zhang ierr = PetscSFSetUpRanks(esf,group);CHKERRQ(ierr); 644f659e5c7SJunchao Zhang ierr = MPI_Group_free(&group);CHKERRQ(ierr); 645f659e5c7SJunchao Zhang 646f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) */ 647f659e5c7SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); 648f659e5c7SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 649f659e5c7SJunchao Zhang ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 650f659e5c7SJunchao Zhang ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 651f659e5c7SJunchao Zhang 652f659e5c7SJunchao Zhang /* Pass info about selected leaves to root buffer */ 653f659e5c7SJunchao Zhang ierr = PetscSFReduceBegin_Basic(sf,MPIU_INT,leafdata-minleaf,rootdata,MPIU_REPLACE);CHKERRQ(ierr); /* -minleaf to re-adjust start address of leafdata */ 654*9d1c8addSJunchao Zhang ierr = PetscSFPackGetInUse(sf,MPIU_INT,rootdata,leafdata-minleaf,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 655f659e5c7SJunchao Zhang ierr = PetscSFPackWaitall_Basic(link,PETSCSF_LEAF2ROOT_REDUCE);CHKERRQ(ierr); 656f659e5c7SJunchao Zhang 657f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 658f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 659f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 660f659e5c7SJunchao Zhang for (i=0; i<niranks; i++) { 661f659e5c7SJunchao Zhang PetscInt *rootbuf = (PetscInt*)link->root[i]; 662f659e5c7SJunchao Zhang PetscBool connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 663f659e5c7SJunchao Zhang for (j=ioffset[i],k=0; j<ioffset[i+1]; j++,k++) { 664f659e5c7SJunchao Zhang if (rootbuf[k]) {bas->irootloc[q++] = irootloc[j]; connected = PETSC_TRUE;} 665f659e5c7SJunchao Zhang } 666f659e5c7SJunchao Zhang if (connected) { 667f659e5c7SJunchao Zhang bas->niranks++; 668f659e5c7SJunchao Zhang if (i<ndiranks) bas->ndiranks++; 669f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 670f659e5c7SJunchao Zhang bas->ioffset[p+1] = q; 671f659e5c7SJunchao Zhang p++; 672f659e5c7SJunchao Zhang } 673f659e5c7SJunchao Zhang } 674f659e5c7SJunchao Zhang bas->itotal = q; 675f659e5c7SJunchao Zhang ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 676f659e5c7SJunchao Zhang 677f659e5c7SJunchao Zhang /* Setup packing optimizations */ 678f659e5c7SJunchao Zhang ierr = PetscSFPackSetupOptimization(esf->nranks,esf->roffset,esf->rmine,&esf->leafpackopt);CHKERRQ(ierr); 679f659e5c7SJunchao Zhang ierr = PetscSFPackSetupOptimization(bas->niranks,bas->ioffset,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 680f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 681f659e5c7SJunchao Zhang 682f659e5c7SJunchao Zhang ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr); 683f659e5c7SJunchao Zhang *newsf = esf; 684f659e5c7SJunchao Zhang PetscFunctionReturn(0); 685f659e5c7SJunchao Zhang } 686f659e5c7SJunchao Zhang 6878cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 68895fce210SBarry Smith { 68940e23c03SJunchao Zhang PetscSF_Basic *dat; 69095fce210SBarry Smith PetscErrorCode ierr; 69195fce210SBarry Smith 69295fce210SBarry Smith PetscFunctionBegin; 69395fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 69495fce210SBarry Smith sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic; 69595fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 69695fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 69795fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 6983482bfa8SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic; 6993482bfa8SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 70095fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 70195fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 70295fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 70395fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 7048750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 705f659e5c7SJunchao Zhang sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 706f659e5c7SJunchao Zhang sf->ops->CreateEmbeddedLeafSF = PetscSFCreateEmbeddedLeafSF_Basic; 70795fce210SBarry Smith 70840e23c03SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 70940e23c03SJunchao Zhang sf->data = (void*)dat; 71095fce210SBarry Smith PetscFunctionReturn(0); 71195fce210SBarry Smith } 712