1 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2 #include <../src/vec/is/sf/impls/basic/sfpack.h> 3 #include <petsc/private/viewerimpl.h> 4 5 // Init persistent MPI send/recv requests 6 static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 7 { 8 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 9 PetscInt cnt; 10 PetscMPIInt nrootranks, ndrootranks, nleafranks, ndleafranks; 11 const PetscInt *rootoffset, *leafoffset; 12 MPI_Aint disp; 13 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 14 MPI_Datatype unit = link->unit; 15 const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */ 16 const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 17 18 PetscFunctionBegin; 19 if (bas->rootbuflen[PETSCSF_REMOTE] && !link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) { 20 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL)); 21 if (direction == PETSCSF_LEAF2ROOT) { 22 for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 23 disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 24 cnt = rootoffset[i + 1] - rootoffset[i]; 25 PetscCallMPI(MPIU_Recv_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 26 } 27 } else { /* PETSCSF_ROOT2LEAF */ 28 for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 29 disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 30 cnt = rootoffset[i + 1] - rootoffset[i]; 31 PetscCallMPI(MPIU_Send_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 32 } 33 } 34 link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE; 35 } 36 37 if (sf->leafbuflen[PETSCSF_REMOTE] && !link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi]) { 38 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL)); 39 if (direction == PETSCSF_LEAF2ROOT) { 40 for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 41 disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 42 cnt = leafoffset[i + 1] - leafoffset[i]; 43 PetscCallMPI(MPIU_Send_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 44 } 45 } else { /* PETSCSF_ROOT2LEAF */ 46 for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 47 disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 48 cnt = leafoffset[i + 1] - leafoffset[i]; 49 PetscCallMPI(MPIU_Recv_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 50 } 51 } 52 link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi] = PETSC_TRUE; 53 } 54 PetscFunctionReturn(PETSC_SUCCESS); 55 } 56 57 // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf 58 static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 59 { 60 PetscMPIInt nsreqs = 0, nrreqs = 0; 61 MPI_Request *sreqs = NULL, *rreqs = NULL; 62 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 63 PetscInt sbuflen, rbuflen; 64 65 PetscFunctionBegin; 66 rbuflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE]; 67 if (rbuflen) { 68 if (direction == PETSCSF_ROOT2LEAF) { 69 nrreqs = sf->nleafreqs; 70 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &rreqs)); 71 } else { /* leaf to root */ 72 nrreqs = bas->nrootreqs; 73 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &rreqs, NULL)); 74 } 75 } 76 77 sbuflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE]; 78 if (sbuflen) { 79 if (direction == PETSCSF_ROOT2LEAF) { 80 nsreqs = bas->nrootreqs; 81 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */)); 82 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &sreqs, NULL)); 83 } else { /* leaf to root */ 84 nsreqs = sf->nleafreqs; 85 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE)); 86 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &sreqs)); 87 } 88 } 89 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); // need to sync the stream to make BOTH sendbuf and recvbuf ready 90 if (rbuflen) PetscCallMPI(MPI_Startall_irecv(rbuflen, link->unit, nrreqs, rreqs)); 91 if (sbuflen) PetscCallMPI(MPI_Startall_isend(sbuflen, link->unit, nsreqs, sreqs)); 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 #if defined(PETSC_HAVE_MPIX_STREAM) 96 // issue MPIX_Isend/Irecv_enqueue() 97 static PetscErrorCode PetscSFLinkStartCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 98 { 99 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 100 PetscInt i, j; 101 PetscMPIInt nrootranks, ndrootranks, nleafranks, ndleafranks, cnt; 102 const PetscInt *rootoffset, *leafoffset; 103 MPI_Aint disp; 104 MPI_Comm stream_comm = sf->stream_comm; 105 MPI_Datatype unit = link->unit; 106 const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */ 107 const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 108 109 PetscFunctionBegin; 110 if (bas->rootbuflen[PETSCSF_REMOTE]) { 111 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL)); 112 if (direction == PETSCSF_LEAF2ROOT) { 113 for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 114 disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 115 cnt = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]); 116 PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 117 } 118 } else { // PETSCSF_ROOT2LEAF 119 for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 120 disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 121 cnt = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]); 122 // no need to sync the gpu stream! 123 PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 124 } 125 } 126 } 127 128 if (sf->leafbuflen[PETSCSF_REMOTE]) { 129 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL)); 130 if (direction == PETSCSF_LEAF2ROOT) { 131 for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 132 disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 133 cnt = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]); 134 // no need to sync the gpu stream! 135 PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 136 } 137 } else { // PETSCSF_ROOT2LEAF 138 for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 139 disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 140 cnt = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]); 141 PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 142 } 143 } 144 } 145 PetscFunctionReturn(PETSC_SUCCESS); 146 } 147 148 static PetscErrorCode PetscSFLinkFinishCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 149 { 150 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 151 const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; 152 const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 153 154 PetscFunctionBegin; 155 PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE)); 156 PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE)); 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 #endif 160 161 static PetscErrorCode PetscSFSetCommunicationOps_Basic(PetscSF sf, PetscSFLink link) 162 { 163 PetscFunctionBegin; 164 link->InitMPIRequests = PetscSFLinkInitMPIRequests_Persistent_Basic; 165 link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Basic; 166 #if defined(PETSC_HAVE_MPIX_STREAM) 167 const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; 168 if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) { 169 link->StartCommunication = PetscSFLinkStartCommunication_MPIX_Stream; 170 link->FinishCommunication = PetscSFLinkFinishCommunication_MPIX_Stream; 171 } 172 #endif 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 /*===================================================================================*/ 177 /* SF public interface implementations */ 178 /*===================================================================================*/ 179 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 180 { 181 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 182 PetscInt *rlengths, *ilengths; 183 PetscMPIInt nRemoteRootRanks, nRemoteLeafRanks; 184 PetscMPIInt rank, niranks, *iranks, tag; 185 MPI_Comm comm; 186 MPI_Group group; 187 MPI_Request *rootreqs, *leafreqs; 188 189 PetscFunctionBegin; 190 PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group)); 191 PetscCall(PetscSFSetUpRanks(sf, group)); 192 PetscCallMPI(MPI_Group_free(&group)); 193 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 194 PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag)); 195 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 196 /* 197 * Inform roots about how many leaves and from which ranks 198 */ 199 PetscCall(PetscMalloc1(sf->nranks, &rlengths)); 200 /* Determine number, sending ranks and length of incoming */ 201 for (PetscMPIInt i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ } 202 nRemoteRootRanks = sf->nranks - sf->ndranks; 203 PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, PetscSafePointerPlusOffset(sf->ranks, sf->ndranks), PetscSafePointerPlusOffset(rlengths, sf->ndranks), &niranks, &iranks, (void **)&ilengths)); 204 205 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 206 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 207 small and the sorting is cheap. 208 */ 209 PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths)); 210 211 /* Partition into distinguished and non-distinguished incoming ranks */ 212 bas->ndiranks = sf->ndranks; 213 bas->niranks = bas->ndiranks + niranks; 214 PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset)); 215 bas->ioffset[0] = 0; 216 for (PetscMPIInt i = 0; i < bas->ndiranks; i++) { 217 bas->iranks[i] = sf->ranks[i]; 218 bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i]; 219 } 220 PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks"); 221 for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) { 222 bas->iranks[i] = iranks[i - bas->ndiranks]; 223 bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks]; 224 } 225 bas->itotal = bas->ioffset[bas->niranks]; 226 PetscCall(PetscFree(rlengths)); 227 PetscCall(PetscFree(iranks)); 228 PetscCall(PetscFree(ilengths)); 229 230 /* Send leaf identities to roots */ 231 nRemoteLeafRanks = bas->niranks - bas->ndiranks; 232 PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc)); 233 PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs)); 234 for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks])); 235 for (PetscMPIInt i = 0; i < sf->nranks; i++) { 236 PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i]; 237 if (i < sf->ndranks) { 238 PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank"); 239 PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank"); 240 PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths"); 241 PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints)); 242 continue; 243 } 244 PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks])); 245 } 246 PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE)); 247 PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE)); 248 249 sf->nleafreqs = nRemoteRootRanks; 250 bas->nrootreqs = nRemoteLeafRanks; 251 252 /* Setup fields related to packing, such as rootbuflen[] */ 253 PetscCall(PetscSFSetUpPackFields(sf)); 254 PetscCall(PetscFree2(rootreqs, leafreqs)); 255 PetscFunctionReturn(PETSC_SUCCESS); 256 } 257 258 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 259 { 260 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 261 PetscSFLink link = bas->avail, next; 262 263 PetscFunctionBegin; 264 PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed"); 265 PetscCall(PetscFree2(bas->iranks, bas->ioffset)); 266 PetscCall(PetscFree(bas->irootloc)); 267 268 #if defined(PETSC_HAVE_DEVICE) 269 for (int i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i])); 270 #endif 271 272 #if defined(PETSC_HAVE_NVSHMEM) 273 PetscCall(PetscSFReset_Basic_NVSHMEM(sf)); 274 #endif 275 276 for (; link; link = next) { 277 next = link->next; 278 PetscCall(PetscSFLinkDestroy(sf, link)); 279 } 280 bas->avail = NULL; 281 PetscCall(PetscSFResetPackFields(sf)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 286 { 287 PetscFunctionBegin; 288 PetscCall(PetscSFReset_Basic(sf)); 289 PetscCall(PetscFree(sf->data)); 290 PetscFunctionReturn(PETSC_SUCCESS); 291 } 292 293 #if defined(PETSC_USE_SINGLE_LIBRARY) 294 #include <petscmat.h> 295 296 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer) 297 { 298 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 299 PetscMPIInt nrootranks, ndrootranks; 300 const PetscInt *rootoffset; 301 PetscMPIInt rank, size; 302 const PetscMPIInt *rootranks; 303 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 304 PetscScalar unitbytes; 305 Mat A; 306 307 PetscFunctionBegin; 308 PetscCallMPI(MPI_Comm_size(comm, &size)); 309 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 310 /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 311 PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 312 in senders' view point: how many bytes I will send to my neighbors. 313 314 Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 315 316 If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 317 different interpretations for the same SF for different data types. Since we most care about VecScatter, 318 we uniformly treat each vertex as a PetscScalar. 319 */ 320 unitbytes = (PetscScalar)sizeof(PetscScalar); 321 322 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL)); 323 PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A)); 324 PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */ 325 for (PetscMPIInt i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES)); 326 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 327 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 328 PetscCall(MatView(A, viewer)); 329 PetscCall(MatDestroy(&A)); 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 #endif 333 334 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer) 335 { 336 PetscBool isascii; 337 338 PetscFunctionBegin; 339 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 340 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered")); 341 #if defined(PETSC_USE_SINGLE_LIBRARY) 342 else { 343 PetscBool isdraw, isbinary; 344 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 345 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 346 if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer)); 347 } 348 #endif 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 353 { 354 PetscSFLink link = NULL; 355 356 PetscFunctionBegin; 357 /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 358 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 359 /* Pack rootdata to rootbuf for remote communication */ 360 PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 361 /* Start communication, e.g., post MPIU_Isend */ 362 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 363 /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 364 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op)); 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 369 { 370 PetscSFLink link = NULL; 371 372 PetscFunctionBegin; 373 /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 374 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 375 /* Finish remote communication, e.g., post MPI_Waitall */ 376 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 377 /* Unpack data in leafbuf to leafdata for remote communication */ 378 PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op)); 379 /* Recycle the link */ 380 PetscCall(PetscSFLinkReclaim(sf, &link)); 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } 383 384 /* Shared by ReduceBegin and FetchAndOpBegin */ 385 static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out) 386 { 387 PetscSFLink link = NULL; 388 389 PetscFunctionBegin; 390 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link)); 391 PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 392 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT)); 393 *out = link; 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 /* leaf -> root with reduction */ 398 PETSC_INTERN PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 399 { 400 PetscSFLink link = NULL; 401 402 PetscFunctionBegin; 403 PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link)); 404 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op)); 405 PetscFunctionReturn(PETSC_SUCCESS); 406 } 407 408 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 409 { 410 PetscSFLink link = NULL; 411 412 PetscFunctionBegin; 413 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 414 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 415 PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op)); 416 PetscCall(PetscSFLinkReclaim(sf, &link)); 417 PetscFunctionReturn(PETSC_SUCCESS); 418 } 419 420 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 421 { 422 PetscSFLink link = NULL; 423 424 PetscFunctionBegin; 425 PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link)); 426 PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op)); 427 PetscFunctionReturn(PETSC_SUCCESS); 428 } 429 430 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 431 { 432 PetscSFLink link = NULL; 433 434 PetscFunctionBegin; 435 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 436 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 437 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 438 /* Do fetch-and-op, the (remote) update results are in rootbuf */ 439 PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op)); 440 /* Bcast rootbuf to leafupdate */ 441 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 442 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 443 /* Unpack and insert fetched data into leaves */ 444 PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE)); 445 PetscCall(PetscSFLinkReclaim(sf, &link)); 446 PetscFunctionReturn(PETSC_SUCCESS); 447 } 448 449 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 450 { 451 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 452 453 PetscFunctionBegin; 454 if (niranks) *niranks = bas->niranks; 455 if (iranks) *iranks = bas->iranks; 456 if (ioffset) *ioffset = bas->ioffset; 457 if (irootloc) *irootloc = bas->irootloc; 458 PetscFunctionReturn(PETSC_SUCCESS); 459 } 460 461 /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf. 462 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 463 was sorted before calling the routine. 464 */ 465 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 466 { 467 PetscSF esf; 468 PetscInt *esf_roffset, *esf_rmine, *esf_rremote; 469 PetscInt j, p, q, nroots, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 470 char *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */ 471 PetscMPIInt *esf_ranks, nranks, ndranks, niranks, esf_nranks, esf_ndranks, ndiranks; 472 const PetscMPIInt *ranks, *iranks; 473 const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc; 474 PetscBool connected; 475 PetscSFNode *new_iremote; 476 PetscSF_Basic *bas; 477 478 PetscFunctionBegin; 479 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf)); 480 PetscCall(PetscSFSetFromOptions(esf)); 481 PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 482 483 /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 484 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 485 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 486 maxlocal = maxleaf - minleaf + 1; 487 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 488 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 489 /* Tag selected roots */ 490 for (PetscInt i = 0; i < nselected; ++i) rootdata[selected[i]] = 1; 491 492 PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 493 PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 494 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */ 495 esf_nranks = esf_ndranks = esf_nleaves = 0; 496 for (PetscMPIInt i = 0; i < nranks; i++) { 497 connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 498 for (j = roffset[i]; j < roffset[i + 1]; j++) { 499 if (leafdata[rmine[j]]) { 500 esf_nleaves++; 501 connected = PETSC_TRUE; 502 } 503 } 504 if (connected) { 505 esf_nranks++; 506 if (i < ndranks) esf_ndranks++; 507 } 508 } 509 510 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 511 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 512 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 513 PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote)); 514 p = 0; /* Counter for connected root ranks */ 515 q = 0; /* Counter for connected leaves */ 516 esf_roffset[0] = 0; 517 for (PetscMPIInt i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */ 518 connected = PETSC_FALSE; 519 for (j = roffset[i]; j < roffset[i + 1]; j++) { 520 if (leafdata[rmine[j]]) { 521 esf_rmine[q] = new_ilocal[q] = rmine[j]; 522 esf_rremote[q] = rremote[j]; 523 new_iremote[q].index = rremote[j]; 524 new_iremote[q].rank = ranks[i]; 525 connected = PETSC_TRUE; 526 q++; 527 } 528 } 529 if (connected) { 530 esf_ranks[p] = ranks[i]; 531 esf_roffset[p + 1] = q; 532 p++; 533 } 534 } 535 536 /* SetGraph internally resets the SF, so we only set its fields after the call */ 537 PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 538 esf->nranks = esf_nranks; 539 esf->ndranks = esf_ndranks; 540 esf->ranks = esf_ranks; 541 esf->roffset = esf_roffset; 542 esf->rmine = esf_rmine; 543 esf->rremote = esf_rremote; 544 esf->nleafreqs = esf_nranks - esf_ndranks; 545 546 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 547 bas = (PetscSF_Basic *)esf->data; 548 PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */ 549 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 550 we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 551 */ 552 PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset)); 553 PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc)); 554 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 555 p = 0; /* Counter for connected leaf ranks */ 556 q = 0; /* Counter for connected roots */ 557 for (PetscMPIInt i = 0; i < niranks; i++) { 558 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 559 for (j = ioffset[i]; j < ioffset[i + 1]; j++) { 560 if (rootdata[irootloc[j]]) { 561 bas->irootloc[q++] = irootloc[j]; 562 connected = PETSC_TRUE; 563 } 564 } 565 if (connected) { 566 bas->niranks++; 567 if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 568 bas->iranks[p] = iranks[i]; 569 bas->ioffset[p + 1] = q; 570 p++; 571 } 572 } 573 bas->itotal = q; 574 bas->nrootreqs = bas->niranks - bas->ndiranks; 575 esf->persistent = PETSC_TRUE; 576 /* Setup packing related fields */ 577 PetscCall(PetscSFSetUpPackFields(esf)); 578 579 /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 580 #if defined(PETSC_HAVE_CUDA) 581 if (esf->backend == PETSCSF_BACKEND_CUDA) { 582 esf->ops->Malloc = PetscSFMalloc_CUDA; 583 esf->ops->Free = PetscSFFree_CUDA; 584 } 585 #endif 586 587 #if defined(PETSC_HAVE_HIP) 588 /* TODO: Needs debugging */ 589 if (esf->backend == PETSCSF_BACKEND_HIP) { 590 esf->ops->Malloc = PetscSFMalloc_HIP; 591 esf->ops->Free = PetscSFFree_HIP; 592 } 593 #endif 594 595 #if defined(PETSC_HAVE_KOKKOS) 596 if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 597 esf->ops->Malloc = PetscSFMalloc_Kokkos; 598 esf->ops->Free = PetscSFFree_Kokkos; 599 } 600 #endif 601 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 602 PetscCall(PetscFree2(rootdata, leafmem)); 603 *newsf = esf; 604 PetscFunctionReturn(PETSC_SUCCESS); 605 } 606 607 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 608 { 609 PetscSF_Basic *dat; 610 611 PetscFunctionBegin; 612 sf->ops->SetUp = PetscSFSetUp_Basic; 613 sf->ops->Reset = PetscSFReset_Basic; 614 sf->ops->Destroy = PetscSFDestroy_Basic; 615 sf->ops->View = PetscSFView_Basic; 616 sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 617 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 618 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 619 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 620 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 621 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 622 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 623 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 624 sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Basic; 625 626 sf->persistent = PETSC_TRUE; // currently SFBASIC always uses persistent send/recv 627 sf->collective = PETSC_FALSE; 628 629 PetscCall(PetscNew(&dat)); 630 sf->data = (void *)dat; 631 PetscFunctionReturn(PETSC_SUCCESS); 632 } 633