1 2 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 3 4 #define PetscSFPackGet_Gatherv PetscSFPackGet_Allgatherv 5 6 /* Reuse the type. The difference is some fields (displs, recvcounts) are only significant 7 on rank 0 in Gatherv. On other ranks they are harmless NULL. 8 */ 9 typedef PetscSF_Allgatherv PetscSF_Gatherv; 10 11 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 12 { 13 PetscErrorCode ierr; 14 PetscSFPack link; 15 PetscMPIInt sendcount; 16 MPI_Comm comm; 17 PetscSF_Gatherv *dat = (PetscSF_Gatherv*)sf->data; 18 const void *rootbuf_mpi; /* buffer used by MPI */ 19 void *leafbuf_mpi; 20 PetscMemType rootmtype_mpi,leafmtype_mpi; 21 22 PetscFunctionBegin; 23 ierr = PetscSFPackGet_Gatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 24 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 25 ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 26 ierr = PetscSFBcastPrepareMPIBuffers_Allgatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr); 27 ierr = MPIU_Igatherv(rootbuf_mpi,sendcount,unit,leafbuf_mpi,dat->recvcounts,dat->displs,unit,0/*rank 0*/,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype_mpi]);CHKERRQ(ierr); 28 PetscFunctionReturn(0); 29 } 30 31 /* 32 Prepare the rootbuf, leafbuf etc used by MPI in PetscSFReduceBegin. 33 34 Input Arguments: 35 + sf - the start forest 36 . link - the link PetscSFReduceBegin is currently using 37 - op - the reduction op 38 39 Output Arguments: 40 +rootmtype_mpi - memtype of rootbuf_mpi 41 .rootbuf_mpi - root buffer used by MPI in the following MPI call 42 .leafmtype_mpi - memtype of leafbuf_mpi 43 -leafbuf_mpi - leaf buffer used by MPI in the following MPI call 44 */ 45 PETSC_INTERN PetscErrorCode PetscSFReducePrepareMPIBuffers_Gatherv(PetscSF sf,PetscSFPack link,MPI_Op op,PetscMemType *rootmtype_mpi,void **rootbuf_mpi,PetscMemType *leafmtype_mpi,const void **leafbuf_mpi) 46 { 47 PetscErrorCode ierr; 48 PetscMPIInt rank; 49 MPI_Comm comm; 50 51 PetscFunctionBegin; 52 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 53 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 54 55 if (link->leafmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { /* Need to copy leafdata to leafbuf on every rank */ 56 if (!rank && !link->leafbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->leafbuflen*link->unitbytes,(void**)&link->leafbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 57 ierr = PetscMemcpyWithMemType(PETSC_MEMTYPE_HOST,PETSC_MEMTYPE_DEVICE,link->leafbuf[PETSC_MEMTYPE_HOST],link->leafdata,link->leafbuflen*link->unitbytes);CHKERRQ(ierr); 58 *leafmtype_mpi = PETSC_MEMTYPE_HOST; 59 *leafbuf_mpi = link->leafbuf[*leafmtype_mpi]; 60 } else { 61 *leafmtype_mpi = link->leafmtype; 62 *leafbuf_mpi = (char*)link->leafdata; 63 } 64 65 if (link->rootmtype == PETSC_MEMTYPE_DEVICE && !use_gpu_aware_mpi) { /* If rootdata is on device but no gpu-aware mpi, we need a rootbuf on host to receive reduced data */ 66 if (!link->rootbuf[PETSC_MEMTYPE_HOST]) {ierr = PetscMallocWithMemType(PETSC_MEMTYPE_HOST,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[PETSC_MEMTYPE_HOST]);CHKERRQ(ierr);} 67 *rootbuf_mpi = link->rootbuf[PETSC_MEMTYPE_HOST]; 68 *rootmtype_mpi = PETSC_MEMTYPE_HOST; 69 } else if (op == MPIU_REPLACE) { /* Directly use rootdata's memory to receive reduced data. No intermediate buffer needed. */ 70 *rootbuf_mpi = (char *)link->rootdata; 71 *rootmtype_mpi = link->rootmtype; 72 } else { /* op is a reduction. Have to allocate a buffer aside rootdata to apply it. The buffer is either on host or device, depending on where rootdata is. */ 73 if (!link->rootbuf[link->rootmtype]) {ierr = PetscMallocWithMemType(link->rootmtype,link->rootbuflen*link->unitbytes,(void**)&link->rootbuf[link->rootmtype]);CHKERRQ(ierr);} 74 *rootbuf_mpi = link->rootbuf[link->rootmtype]; 75 *rootmtype_mpi = link->rootmtype; 76 } 77 PetscFunctionReturn(0); 78 } 79 80 static PetscErrorCode PetscSFReduceBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 81 { 82 PetscErrorCode ierr; 83 PetscSFPack link; 84 PetscMPIInt recvcount; 85 MPI_Comm comm; 86 PetscSF_Gatherv *dat = (PetscSF_Gatherv*)sf->data; 87 const void *leafbuf_mpi; 88 void *rootbuf_mpi; 89 PetscMemType leafmtype_mpi,rootmtype_mpi; 90 91 PetscFunctionBegin; 92 ierr = PetscSFPackGet_Gatherv(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 93 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 94 ierr = PetscMPIIntCast(sf->nroots,&recvcount);CHKERRQ(ierr); 95 ierr = PetscSFReducePrepareMPIBuffers_Gatherv(sf,link,op,&rootmtype_mpi,&rootbuf_mpi,&leafmtype_mpi,&leafbuf_mpi);CHKERRQ(ierr); 96 ierr = MPIU_Iscatterv(leafbuf_mpi,dat->recvcounts,dat->displs,unit,rootbuf_mpi,recvcount,unit,0,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype_mpi]);CHKERRQ(ierr); 97 PetscFunctionReturn(0); 98 } 99 100 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Gatherv(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 101 { 102 PetscErrorCode ierr; 103 104 PetscFunctionBegin; 105 /* In Gatherv, each root only has one leaf. So we just need to bcast rootdata to leafupdate and then reduce leafdata to rootdata */ 106 ierr = PetscSFBcastAndOpBegin(sf,unit,rootdata,leafupdate,MPIU_REPLACE);CHKERRQ(ierr); 107 ierr = PetscSFBcastAndOpEnd(sf,unit,rootdata,leafupdate,MPIU_REPLACE);CHKERRQ(ierr); 108 ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 PETSC_INTERN PetscErrorCode PetscSFCreate_Gatherv(PetscSF sf) 113 { 114 PetscErrorCode ierr; 115 PetscSF_Gatherv *dat = (PetscSF_Gatherv*)sf->data; 116 117 PetscFunctionBegin; 118 /* Inherit from Allgatherv */ 119 sf->ops->SetUp = PetscSFSetUp_Allgatherv; 120 sf->ops->Reset = PetscSFReset_Allgatherv; 121 sf->ops->Destroy = PetscSFDestroy_Allgatherv; 122 sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 123 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 124 sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 125 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 126 sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 127 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 128 sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 129 130 /* Gatherv stuff */ 131 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Gatherv; 132 sf->ops->ReduceBegin = PetscSFReduceBegin_Gatherv; 133 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 134 135 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 136 sf->data = (void*)dat; 137 PetscFunctionReturn(0); 138 } 139