1 #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2 #include <../src/vec/is/sf/impls/basic/sfpack.h> 3 #include <petsc/private/viewerimpl.h> 4 5 /*===================================================================================*/ 6 /* SF public interface implementations */ 7 /*===================================================================================*/ 8 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 9 { 10 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 11 PetscInt *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks; 12 PetscMPIInt rank, niranks, *iranks, tag; 13 MPI_Comm comm; 14 MPI_Group group; 15 MPI_Request *rootreqs, *leafreqs; 16 17 PetscFunctionBegin; 18 PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group)); 19 PetscCall(PetscSFSetUpRanks(sf, group)); 20 PetscCallMPI(MPI_Group_free(&group)); 21 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 22 PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag)); 23 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 24 /* 25 * Inform roots about how many leaves and from which ranks 26 */ 27 PetscCall(PetscMalloc1(sf->nranks, &rlengths)); 28 /* Determine number, sending ranks and length of incoming */ 29 for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ } 30 nRemoteRootRanks = sf->nranks - sf->ndranks; 31 PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, sf->ranks + sf->ndranks, rlengths + sf->ndranks, &niranks, &iranks, (void **)&ilengths)); 32 33 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 34 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 35 small and the sorting is cheap. 36 */ 37 PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths)); 38 39 /* Partition into distinguished and non-distinguished incoming ranks */ 40 bas->ndiranks = sf->ndranks; 41 bas->niranks = bas->ndiranks + niranks; 42 PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset)); 43 bas->ioffset[0] = 0; 44 for (i = 0; i < bas->ndiranks; i++) { 45 bas->iranks[i] = sf->ranks[i]; 46 bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i]; 47 } 48 PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks"); 49 for (; i < bas->niranks; i++) { 50 bas->iranks[i] = iranks[i - bas->ndiranks]; 51 bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks]; 52 } 53 bas->itotal = bas->ioffset[i]; 54 PetscCall(PetscFree(rlengths)); 55 PetscCall(PetscFree(iranks)); 56 PetscCall(PetscFree(ilengths)); 57 58 /* Send leaf identities to roots */ 59 nRemoteLeafRanks = bas->niranks - bas->ndiranks; 60 PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc)); 61 PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs)); 62 for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks])); 63 for (i = 0; i < sf->nranks; i++) { 64 PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i]; 65 if (i < sf->ndranks) { 66 PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank"); 67 PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank"); 68 PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths"); 69 PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints)); 70 continue; 71 } 72 PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks])); 73 } 74 PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE)); 75 PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE)); 76 77 sf->nleafreqs = nRemoteRootRanks; 78 bas->nrootreqs = nRemoteLeafRanks; 79 sf->persistent = PETSC_TRUE; 80 81 /* Setup fields related to packing, such as rootbuflen[] */ 82 PetscCall(PetscSFSetUpPackFields(sf)); 83 PetscCall(PetscFree2(rootreqs, leafreqs)); 84 PetscFunctionReturn(PETSC_SUCCESS); 85 } 86 87 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 88 { 89 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 90 PetscSFLink link = bas->avail, next; 91 92 PetscFunctionBegin; 93 PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed"); 94 PetscCall(PetscFree2(bas->iranks, bas->ioffset)); 95 PetscCall(PetscFree(bas->irootloc)); 96 97 #if defined(PETSC_HAVE_DEVICE) 98 for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i])); 99 #endif 100 101 #if defined(PETSC_HAVE_NVSHMEM) 102 PetscCall(PetscSFReset_Basic_NVSHMEM(sf)); 103 #endif 104 105 for (; link; link = next) { 106 next = link->next; 107 PetscCall(PetscSFLinkDestroy(sf, link)); 108 } 109 bas->avail = NULL; 110 PetscCall(PetscSFResetPackFields(sf)); 111 PetscFunctionReturn(PETSC_SUCCESS); 112 } 113 114 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 115 { 116 PetscFunctionBegin; 117 PetscCall(PetscSFReset_Basic(sf)); 118 PetscCall(PetscFree(sf->data)); 119 PetscFunctionReturn(PETSC_SUCCESS); 120 } 121 122 #if defined(PETSC_USE_SINGLE_LIBRARY) 123 #include <petscmat.h> 124 125 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer) 126 { 127 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 128 PetscInt i, nrootranks, ndrootranks; 129 const PetscInt *rootoffset; 130 PetscMPIInt rank, size; 131 const PetscMPIInt *rootranks; 132 MPI_Comm comm = PetscObjectComm((PetscObject)sf); 133 PetscScalar unitbytes; 134 Mat A; 135 136 PetscFunctionBegin; 137 PetscCallMPI(MPI_Comm_size(comm, &size)); 138 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 139 /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 140 PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 141 in senders' view point: how many bytes I will send to my neighbors. 142 143 Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 144 145 If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 146 different interpretations for the same SF for different data types. Since we most care about VecScatter, 147 we uniformly treat each vertex as a PetscScalar. 148 */ 149 unitbytes = (PetscScalar)sizeof(PetscScalar); 150 151 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL)); 152 PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A)); 153 PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */ 154 for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES)); 155 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 156 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 157 PetscCall(MatView(A, viewer)); 158 PetscCall(MatDestroy(&A)); 159 PetscFunctionReturn(PETSC_SUCCESS); 160 } 161 #endif 162 163 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer) 164 { 165 PetscBool isascii; 166 167 PetscFunctionBegin; 168 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 169 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered")); 170 #if defined(PETSC_USE_SINGLE_LIBRARY) 171 else { 172 PetscBool isdraw, isbinary; 173 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 174 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 175 if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer)); 176 } 177 #endif 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 182 { 183 PetscSFLink link = NULL; 184 185 PetscFunctionBegin; 186 /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 187 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 188 /* Pack rootdata to rootbuf for remote communication */ 189 PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 190 /* Start communication, e.g., post MPI_Isend */ 191 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 192 /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 193 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op)); 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 198 { 199 PetscSFLink link = NULL; 200 201 PetscFunctionBegin; 202 /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 203 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 204 /* Finish remote communication, e.g., post MPI_Waitall */ 205 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 206 /* Unpack data in leafbuf to leafdata for remote communication */ 207 PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op)); 208 /* Recycle the link */ 209 PetscCall(PetscSFLinkReclaim(sf, &link)); 210 PetscFunctionReturn(PETSC_SUCCESS); 211 } 212 213 /* Shared by ReduceBegin and FetchAndOpBegin */ 214 static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out) 215 { 216 PetscSFLink link = NULL; 217 218 PetscFunctionBegin; 219 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link)); 220 PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 221 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT)); 222 *out = link; 223 PetscFunctionReturn(PETSC_SUCCESS); 224 } 225 226 /* leaf -> root with reduction */ 227 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 228 { 229 PetscSFLink link = NULL; 230 231 PetscFunctionBegin; 232 PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link)); 233 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op)); 234 PetscFunctionReturn(PETSC_SUCCESS); 235 } 236 237 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 238 { 239 PetscSFLink link = NULL; 240 241 PetscFunctionBegin; 242 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 243 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 244 PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op)); 245 PetscCall(PetscSFLinkReclaim(sf, &link)); 246 PetscFunctionReturn(PETSC_SUCCESS); 247 } 248 249 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 250 { 251 PetscSFLink link = NULL; 252 253 PetscFunctionBegin; 254 PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link)); 255 PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op)); 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 260 { 261 PetscSFLink link = NULL; 262 263 PetscFunctionBegin; 264 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 265 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 266 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 267 /* Do fetch-and-op, the (remote) update results are in rootbuf */ 268 PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op)); 269 /* Bcast rootbuf to leafupdate */ 270 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 271 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 272 /* Unpack and insert fetched data into leaves */ 273 PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE)); 274 PetscCall(PetscSFLinkReclaim(sf, &link)); 275 PetscFunctionReturn(PETSC_SUCCESS); 276 } 277 278 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 279 { 280 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 281 282 PetscFunctionBegin; 283 if (niranks) *niranks = bas->niranks; 284 if (iranks) *iranks = bas->iranks; 285 if (ioffset) *ioffset = bas->ioffset; 286 if (irootloc) *irootloc = bas->irootloc; 287 PetscFunctionReturn(PETSC_SUCCESS); 288 } 289 290 /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf. 291 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 292 was sorted before calling the routine. 293 */ 294 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 295 { 296 PetscSF esf; 297 PetscInt esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote; 298 PetscInt i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal; 299 char *rootdata, *leafdata = NULL, *leafmem; /* Only stores 0 or 1, so we can save memory with char */ 300 PetscMPIInt *esf_ranks; 301 const PetscMPIInt *ranks, *iranks; 302 const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc; 303 PetscBool connected; 304 PetscSFNode *new_iremote; 305 PetscSF_Basic *bas; 306 307 PetscFunctionBegin; 308 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf)); 309 PetscCall(PetscSFSetFromOptions(esf)); 310 PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 311 312 /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 313 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 314 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 315 maxlocal = maxleaf - minleaf + 1; 316 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 317 if (leafmem) leafdata = leafmem - minleaf; 318 /* Tag selected roots */ 319 for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1; 320 321 PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 322 PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 323 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */ 324 esf_nranks = esf_ndranks = esf_nleaves = 0; 325 for (i = 0; i < nranks; i++) { 326 connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 327 for (j = roffset[i]; j < roffset[i + 1]; j++) { 328 if (leafdata[rmine[j]]) { 329 esf_nleaves++; 330 connected = PETSC_TRUE; 331 } 332 } 333 if (connected) { 334 esf_nranks++; 335 if (i < ndranks) esf_ndranks++; 336 } 337 } 338 339 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 340 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 341 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 342 PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote)); 343 p = 0; /* Counter for connected root ranks */ 344 q = 0; /* Counter for connected leaves */ 345 esf_roffset[0] = 0; 346 for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */ 347 connected = PETSC_FALSE; 348 for (j = roffset[i]; j < roffset[i + 1]; j++) { 349 if (leafdata[rmine[j]]) { 350 esf_rmine[q] = new_ilocal[q] = rmine[j]; 351 esf_rremote[q] = rremote[j]; 352 new_iremote[q].index = rremote[j]; 353 new_iremote[q].rank = ranks[i]; 354 connected = PETSC_TRUE; 355 q++; 356 } 357 } 358 if (connected) { 359 esf_ranks[p] = ranks[i]; 360 esf_roffset[p + 1] = q; 361 p++; 362 } 363 } 364 365 /* SetGraph internally resets the SF, so we only set its fields after the call */ 366 PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 367 esf->nranks = esf_nranks; 368 esf->ndranks = esf_ndranks; 369 esf->ranks = esf_ranks; 370 esf->roffset = esf_roffset; 371 esf->rmine = esf_rmine; 372 esf->rremote = esf_rremote; 373 esf->nleafreqs = esf_nranks - esf_ndranks; 374 375 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 376 bas = (PetscSF_Basic *)esf->data; 377 PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */ 378 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 379 we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 380 */ 381 PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset)); 382 PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc)); 383 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 384 p = 0; /* Counter for connected leaf ranks */ 385 q = 0; /* Counter for connected roots */ 386 for (i = 0; i < niranks; i++) { 387 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 388 for (j = ioffset[i]; j < ioffset[i + 1]; j++) { 389 if (rootdata[irootloc[j]]) { 390 bas->irootloc[q++] = irootloc[j]; 391 connected = PETSC_TRUE; 392 } 393 } 394 if (connected) { 395 bas->niranks++; 396 if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 397 bas->iranks[p] = iranks[i]; 398 bas->ioffset[p + 1] = q; 399 p++; 400 } 401 } 402 bas->itotal = q; 403 bas->nrootreqs = bas->niranks - bas->ndiranks; 404 esf->persistent = PETSC_TRUE; 405 /* Setup packing related fields */ 406 PetscCall(PetscSFSetUpPackFields(esf)); 407 408 /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 409 #if defined(PETSC_HAVE_CUDA) 410 if (esf->backend == PETSCSF_BACKEND_CUDA) { 411 esf->ops->Malloc = PetscSFMalloc_CUDA; 412 esf->ops->Free = PetscSFFree_CUDA; 413 } 414 #endif 415 416 #if defined(PETSC_HAVE_HIP) 417 /* TODO: Needs debugging */ 418 if (esf->backend == PETSCSF_BACKEND_HIP) { 419 esf->ops->Malloc = PetscSFMalloc_HIP; 420 esf->ops->Free = PetscSFFree_HIP; 421 } 422 #endif 423 424 #if defined(PETSC_HAVE_KOKKOS) 425 if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 426 esf->ops->Malloc = PetscSFMalloc_Kokkos; 427 esf->ops->Free = PetscSFFree_Kokkos; 428 } 429 #endif 430 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 431 PetscCall(PetscFree2(rootdata, leafmem)); 432 *newsf = esf; 433 PetscFunctionReturn(PETSC_SUCCESS); 434 } 435 436 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 437 { 438 PetscSF_Basic *dat; 439 440 PetscFunctionBegin; 441 sf->ops->SetUp = PetscSFSetUp_Basic; 442 sf->ops->Reset = PetscSFReset_Basic; 443 sf->ops->Destroy = PetscSFDestroy_Basic; 444 sf->ops->View = PetscSFView_Basic; 445 sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 446 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 447 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 448 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 449 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 450 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 451 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 452 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 453 454 PetscCall(PetscNew(&dat)); 455 sf->data = (void *)dat; 456 PetscFunctionReturn(PETSC_SUCCESS); 457 } 458