1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2 3 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpBegin_Gatherv(PetscSF,MPI_Datatype,const void*,void*,MPI_Op); 4 5 /*===================================================================================*/ 6 /* Internal routines for PetscSFPack_Allgatherv */ 7 /*===================================================================================*/ 8 PETSC_INTERN PetscErrorCode PetscSFPackGet_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rkey,const void *lkey,PetscSFPack_Allgatherv *mylink) 9 { 10 PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 11 PetscSFPack_Allgatherv link; 12 PetscSFPack *p; 13 PetscErrorCode ierr; 14 15 PetscFunctionBegin; 16 ierr = PetscSFPackSetErrorOnUnsupportedOverlap(sf,unit,rkey,lkey);CHKERRQ(ierr); 17 /* Look for types in cache */ 18 for (p=&dat->avail; (link=(PetscSFPack_Allgatherv)*p); p=&link->next) { 19 PetscBool match; 20 ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); 21 if (match) { 22 *p = link->next; /* Remove from available list */ 23 goto found; 24 } 25 } 26 27 ierr = PetscNew(&link);CHKERRQ(ierr); 28 ierr = PetscSFPackSetupType((PetscSFPack)link,unit);CHKERRQ(ierr); 29 /* Must be init'ed to NULL so that we can safely wait on them even they are not used */ 30 link->request = MPI_REQUEST_NULL; 31 /* One tag per link */ 32 ierr = PetscCommGetNewTag(PetscObjectComm((PetscObject)sf),&link->tag);CHKERRQ(ierr); 33 34 /* DO NOT allocate link->root/leaf. We use lazy allocation since these buffers are likely not needed */ 35 found: 36 link->rkey = rkey; 37 link->lkey = lkey; 38 link->next = dat->inuse; 39 dat->inuse = (PetscSFPack)link; 40 41 *mylink = link; 42 PetscFunctionReturn(0); 43 } 44 45 /*===================================================================================*/ 46 /* Implementations of SF public APIs */ 47 /*===================================================================================*/ 48 49 /* PetscSFGetGraph is non-collective. An implementation should not have collective calls */ 50 PETSC_INTERN PetscErrorCode PetscSFGetGraph_Allgatherv(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 51 { 52 PetscErrorCode ierr; 53 PetscInt i,j,k; 54 const PetscInt *range; 55 PetscMPIInt size; 56 57 PetscFunctionBegin; 58 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 59 if (nroots) *nroots = sf->nroots; 60 if (nleaves) *nleaves = sf->nleaves; 61 if (ilocal) *ilocal = NULL; /* Contiguous leaves */ 62 if (iremote) { 63 if (!sf->remote && sf->nleaves) { /* The && sf->nleaves makes sfgatherv able to inherit this routine */ 64 ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 65 ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 66 sf->remote_alloc = sf->remote; 67 for (i=0; i<size; i++) { 68 for (j=range[i],k=0; j<range[i+1]; j++,k++) { 69 sf->remote[j].rank = i; 70 sf->remote[j].index = k; 71 } 72 } 73 } 74 *iremote = sf->remote; 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PETSC_INTERN PetscErrorCode PetscSFSetUp_Allgatherv(PetscSF sf) 80 { 81 PetscErrorCode ierr; 82 PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 83 PetscMPIInt size; 84 PetscInt i; 85 const PetscInt *range; 86 87 PetscFunctionBegin; 88 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 89 if (sf->nleaves) { /* This if (sf->nleaves) test makes sfgatherv able to inherit this routine */ 90 ierr = PetscMalloc1(size,&dat->recvcounts);CHKERRQ(ierr); 91 ierr = PetscMalloc1(size,&dat->displs);CHKERRQ(ierr); 92 ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 93 94 for (i=0; i<size; i++) { 95 ierr = PetscMPIIntCast(range[i],&dat->displs[i]);CHKERRQ(ierr); 96 ierr = PetscMPIIntCast(range[i+1]-range[i],&dat->recvcounts[i]);CHKERRQ(ierr); 97 } 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PETSC_INTERN PetscErrorCode PetscSFReset_Allgatherv(PetscSF sf) 103 { 104 PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 105 PetscSFPack_Allgatherv link,next; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = PetscFree(dat->iranks);CHKERRQ(ierr); 110 ierr = PetscFree(dat->ioffset);CHKERRQ(ierr); 111 ierr = PetscFree(dat->irootloc);CHKERRQ(ierr); 112 ierr = PetscFree(dat->recvcounts);CHKERRQ(ierr); 113 ierr = PetscFree(dat->displs);CHKERRQ(ierr); 114 if (dat->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 115 for (link=(PetscSFPack_Allgatherv)dat->avail; link; link=next) { 116 next = (PetscSFPack_Allgatherv)link->next; 117 if (!link->isbuiltin) {ierr = MPI_Type_free(&link->unit);CHKERRQ(ierr);} 118 ierr = PetscFree(link->root);CHKERRQ(ierr); 119 ierr = PetscFree(link->leaf);CHKERRQ(ierr); 120 ierr = PetscFree(link);CHKERRQ(ierr); 121 } 122 dat->avail = NULL; 123 PetscFunctionReturn(0); 124 } 125 126 PETSC_INTERN PetscErrorCode PetscSFDestroy_Allgatherv(PetscSF sf) 127 { 128 PetscErrorCode ierr; 129 130 PetscFunctionBegin; 131 ierr = PetscSFReset_Allgatherv(sf);CHKERRQ(ierr); 132 ierr = PetscFree(sf->data);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 static PetscErrorCode PetscSFBcastAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 137 { 138 PetscErrorCode ierr; 139 PetscSFPack_Allgatherv link; 140 PetscMPIInt sendcount; 141 MPI_Comm comm; 142 void *recvbuf; 143 PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 144 145 PetscFunctionBegin; 146 ierr = PetscSFPackGet_Allgatherv(sf,unit,rootdata,leafdata,&link);CHKERRQ(ierr); 147 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 148 ierr = PetscMPIIntCast(sf->nroots,&sendcount);CHKERRQ(ierr); 149 150 if (op == MPIU_REPLACE) { 151 recvbuf = leafdata; 152 } else { 153 if (!link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);} 154 recvbuf = link->leaf; 155 } 156 ierr = MPIU_Iallgatherv(rootdata,sendcount,unit,recvbuf,dat->recvcounts,dat->displs,unit,comm,&link->request);CHKERRQ(ierr); 157 PetscFunctionReturn(0); 158 } 159 160 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 161 { 162 PetscErrorCode ierr,(*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 163 PetscSFPack_Allgatherv link; 164 165 PetscFunctionBegin; 166 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 167 ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 168 ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr); 169 170 if (op != MPIU_REPLACE) { 171 if (UnpackAndOp) {ierr = (*UnpackAndOp)(sf->nleaves,link->bs,NULL,0,NULL,leafdata,link->leaf);CHKERRQ(ierr);} 172 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 173 else { 174 /* op might be user-defined */ 175 PetscMPIInt count; 176 ierr = PetscMPIIntCast(sf->nleaves,&count);CHKERRQ(ierr); 177 ierr = MPI_Reduce_local(link->leaf,leafdata,count,unit,op);CHKERRQ(ierr); 178 } 179 #else 180 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 181 #endif 182 } 183 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode PetscSFReduceBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 188 { 189 PetscErrorCode ierr; 190 PetscSFPack_Allgatherv link; 191 PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 192 PetscInt rstart; 193 PetscMPIInt rank,count,recvcount; 194 MPI_Comm comm; 195 196 PetscFunctionBegin; 197 ierr = PetscSFPackGet_Allgatherv(sf,unit,rootdata,leafdata,&link);CHKERRQ(ierr); 198 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 199 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 200 201 if (op == MPIU_REPLACE) { 202 /* REPLACE is only meaningful when all processes have the same leafdata to readue. Therefore copy from local leafdata is fine */ 203 ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr); 204 ierr = PetscMemcpy(rootdata,(const char*)leafdata+(size_t)rstart*link->unitbytes,(size_t)sf->nroots*link->unitbytes);CHKERRQ(ierr); 205 } else { 206 /* Reduce all leafdata on rank 0, then scatter the result to root buffer, then reduce root buffer to leafdata */ 207 if (!rank && !link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);} 208 ierr = PetscMPIIntCast(sf->nleaves*link->bs,&count);CHKERRQ(ierr); 209 ierr = PetscMPIIntCast(sf->nroots,&recvcount);CHKERRQ(ierr); 210 ierr = MPI_Reduce(leafdata,link->leaf,count,link->basicunit,op,0,comm);CHKERRQ(ierr); /* Must do reduce with MPI builltin datatype basicunit */ 211 if (!link->root) {ierr = PetscMalloc(sf->nroots*link->unitbytes,&link->root);CHKERRQ(ierr);} 212 ierr = MPIU_Iscatterv(link->leaf,dat->recvcounts,dat->displs,unit,link->root,recvcount,unit,0,comm,&link->request);CHKERRQ(ierr); 213 } 214 PetscFunctionReturn(0); 215 } 216 217 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 218 { 219 PetscErrorCode ierr,(*UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 220 PetscSFPack_Allgatherv link; 221 222 PetscFunctionBegin; 223 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 224 ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr); 225 if (op != MPIU_REPLACE) { 226 ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 227 if (UnpackAndOp) {ierr = (*UnpackAndOp)(sf->nroots,link->bs,NULL,0,NULL,rootdata,link->root);CHKERRQ(ierr);} 228 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 229 else if (sf->nroots) { 230 /* op might be user-defined */ 231 PetscMPIInt count; 232 ierr = PetscMPIIntCast(sf->nroots,&count);CHKERRQ(ierr); 233 ierr = MPI_Reduce_local(link->root,rootdata,count,unit,op);CHKERRQ(ierr); 234 } 235 #else 236 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 237 #endif 238 } 239 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 static PetscErrorCode PetscSFBcastToZero_Allgatherv(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 244 { 245 PetscErrorCode ierr; 246 PetscSFPack_Allgatherv link; 247 248 PetscFunctionBegin; 249 ierr = PetscSFBcastAndOpBegin_Gatherv(sf,unit,rootdata,leafdata,MPIU_REPLACE);CHKERRQ(ierr); 250 /* A simplified PetscSFBcastAndOpEnd_Allgatherv */ 251 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 252 ierr = MPI_Wait(&link->request,MPI_STATUS_IGNORE);CHKERRQ(ierr); 253 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 254 PetscFunctionReturn(0); 255 } 256 257 /* This routine is very tricky (I believe it is rarely used with this kind of graph so just provide a simple but not-optimal implementation). 258 259 Suppose we have three ranks. Rank 0 has a root with value 1. Rank 0,1,2 has a leaf with value 2,3,4 respectively. The leaves are connected 260 to the root on rank 0. Suppose op=MPI_SUM and rank 0,1,2 gets root state in their rank order. By definition of this routine, rank 0 sees 1 261 in root, fetches it into its leafupate, then updates root to 1 + 2 = 3; rank 1 sees 3 in root, fetches it into its leafupate, then updates 262 root to 3 + 3 = 6; rank 2 sees 6 in root, fetches it into its leafupdate, then updates root to 6 + 4 = 10. At the end, leafupdate on rank 263 0,1,2 is 1,3,6 respectively. root is 10. 264 265 One optimized implementation could be: starting from the initial state: 266 rank-0 rank-1 rank-2 267 Root 1 268 Leaf 2 3 4 269 270 Shift leaves rightwards to leafupdate. Rank 0 gathers the root value and puts it in leafupdate. We have: 271 rank-0 rank-1 rank-2 272 Root 1 273 Leaf 2 3 4 274 Leafupdate 1 2 3 275 276 Then, do MPI_Scan on leafupdate and get: 277 rank-0 rank-1 rank-2 278 Root 1 279 Leaf 2 3 4 280 Leafupdate 1 3 6 281 282 Rank 2 sums its leaf and leafupdate, scatters the result to the root, and gets 283 rank-0 rank-1 rank-2 284 Root 10 285 Leaf 2 3 4 286 Leafupdate 1 3 6 287 288 We use a simpler implementation. From the same initial state, we copy leafdata to leafupdate 289 rank-0 rank-1 rank-2 290 Root 1 291 Leaf 2 3 4 292 Leafupdate 2 3 4 293 294 Do MPI_Exscan on leafupdate, 295 rank-0 rank-1 rank-2 296 Root 1 297 Leaf 2 3 4 298 Leafupdate 2 2 5 299 300 BcastAndOp from root to leafupdate, 301 rank-0 rank-1 rank-2 302 Root 1 303 Leaf 2 3 4 304 Leafupdate 3 3 6 305 306 Copy root to leafupdate on rank-0 307 rank-0 rank-1 rank-2 308 Root 1 309 Leaf 2 3 4 310 Leafupdate 1 3 6 311 312 Reduce from leaf to root, 313 rank-0 rank-1 rank-2 314 Root 10 315 Leaf 2 3 4 316 Leafupdate 1 3 6 317 */ 318 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Allgatherv(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 319 { 320 PetscErrorCode ierr; 321 PetscSFPack_Allgatherv link; 322 MPI_Comm comm; 323 PetscMPIInt count; 324 325 PetscFunctionBegin; 326 /* Copy leafdata to leafupdate */ 327 ierr = PetscSFPackGet_Allgatherv(sf,unit,rootdata,leafdata,&link);CHKERRQ(ierr); 328 ierr = PetscMemcpy(leafupdate,leafdata,sf->nleaves*link->unitbytes);CHKERRQ(ierr); 329 ierr = PetscSFPackGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,(PetscSFPack*)&link);CHKERRQ(ierr); 330 331 /* Exscan on leafupdate and then BcastAndOp rootdata to leafupdate */ 332 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 333 ierr = PetscMPIIntCast(sf->nleaves,&count);CHKERRQ(ierr); 334 if (op == MPIU_REPLACE) { 335 PetscMPIInt size,rank,prev,next; 336 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 337 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 338 prev = rank ? rank-1 : MPI_PROC_NULL; 339 next = (rank < size-1) ? rank+1 : MPI_PROC_NULL; 340 ierr = MPI_Sendrecv_replace(leafupdate,count,unit,next,link->tag,prev,link->tag,comm,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 341 } else {ierr = MPI_Exscan(MPI_IN_PLACE,leafupdate,count,unit,op,comm);CHKERRQ(ierr);} 342 ierr = PetscSFPackReclaim(sf,(PetscSFPack*)&link);CHKERRQ(ierr); 343 ierr = PetscSFBcastAndOpBegin(sf,unit,rootdata,leafupdate,op);CHKERRQ(ierr); 344 ierr = PetscSFBcastAndOpEnd(sf,unit,rootdata,leafupdate,op);CHKERRQ(ierr); 345 346 /* Bcast roots to rank 0's leafupdate */ 347 ierr = PetscSFBcastToZero_Private(sf,unit,rootdata,leafupdate);CHKERRQ(ierr); /* Using this line makes Allgather SFs able to inherit this routine */ 348 349 /* Reduce leafdata to rootdata */ 350 ierr = PetscSFReduceBegin(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 354 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Allgatherv(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 355 { 356 PetscErrorCode ierr; 357 358 PetscFunctionBegin; 359 ierr = PetscSFReduceEnd(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 /* Get root ranks accessing my leaves */ 364 PETSC_INTERN PetscErrorCode PetscSFGetRootRanks_Allgatherv(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 365 { 366 PetscErrorCode ierr; 367 PetscInt i,j,k,size; 368 const PetscInt *range; 369 370 PetscFunctionBegin; 371 /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */ 372 if (sf->nranks && !sf->ranks) { /* On rank!=0, sf->nranks=0. The sf->nranks test makes this routine also works for sfgatherv */ 373 size = sf->nranks; 374 ierr = PetscLayoutGetRanges(sf->map,&range);CHKERRQ(ierr); 375 ierr = PetscMalloc4(size,&sf->ranks,size+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 376 for (i=0; i<size; i++) sf->ranks[i] = i; 377 ierr = PetscArraycpy(sf->roffset,range,size+1);CHKERRQ(ierr); 378 for (i=0; i<sf->nleaves; i++) sf->rmine[i] = i; /*rmine are never NULL even for contiguous leaves */ 379 for (i=0; i<size; i++) { 380 for (j=range[i],k=0; j<range[i+1]; j++,k++) sf->rremote[j] = k; 381 } 382 } 383 384 if (nranks) *nranks = sf->nranks; 385 if (ranks) *ranks = sf->ranks; 386 if (roffset) *roffset = sf->roffset; 387 if (rmine) *rmine = sf->rmine; 388 if (rremote) *rremote = sf->rremote; 389 PetscFunctionReturn(0); 390 } 391 392 /* Get leaf ranks accessing my roots */ 393 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Allgatherv(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 394 { 395 PetscErrorCode ierr; 396 PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 397 MPI_Comm comm; 398 PetscMPIInt size,rank; 399 PetscInt i,j; 400 401 PetscFunctionBegin; 402 /* Lazily construct these large arrays if users really need them for this type of SF. Very likely, they do not */ 403 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 404 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 405 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 406 if (niranks) *niranks = size; 407 408 /* PetscSF_Basic has distinguished incoming ranks. Here we do not need that. But we must put self as the first and 409 sort other ranks. See comments in PetscSFSetUp_Basic about MatGetBrowsOfAoCols_MPIAIJ on why. 410 */ 411 if (iranks) { 412 if (!dat->iranks) { 413 ierr = PetscMalloc1(size,&dat->iranks);CHKERRQ(ierr); 414 dat->iranks[0] = rank; 415 for (i=0,j=1; i<size; i++) {if (i == rank) continue; dat->iranks[j++] = i;} 416 } 417 *iranks = dat->iranks; /* dat->iranks was init'ed to NULL by PetscNewLog */ 418 } 419 420 if (ioffset) { 421 if (!dat->ioffset) { 422 ierr = PetscMalloc1(size+1,&dat->ioffset);CHKERRQ(ierr); 423 for (i=0; i<=size; i++) dat->ioffset[i] = i*sf->nroots; 424 } 425 *ioffset = dat->ioffset; 426 } 427 428 if (irootloc) { 429 if (!dat->irootloc) { 430 ierr = PetscMalloc1(sf->nleaves,&dat->irootloc);CHKERRQ(ierr); 431 for (i=0; i<size; i++) { 432 for (j=0; j<sf->nroots; j++) dat->irootloc[i*sf->nroots+j] = j; 433 } 434 } 435 *irootloc = dat->irootloc; 436 } 437 PetscFunctionReturn(0); 438 } 439 440 PETSC_INTERN PetscErrorCode PetscSFCreateLocalSF_Allgatherv(PetscSF sf,PetscSF *out) 441 { 442 PetscInt i,nroots,nleaves,rstart,*ilocal; 443 PetscSFNode *iremote; 444 PetscSF lsf; 445 PetscErrorCode ierr; 446 447 PetscFunctionBegin; 448 nroots = sf->nroots; 449 nleaves = sf->nleaves ? sf->nroots : 0; 450 ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 451 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 452 ierr = PetscLayoutGetRange(sf->map,&rstart,NULL);CHKERRQ(ierr); 453 454 for (i=0; i<nleaves; i++) { 455 ilocal[i] = rstart + i; /* lsf does not change leave indices */ 456 iremote[i].rank = 0; /* rank in PETSC_COMM_SELF */ 457 iremote[i].index = i; /* root index */ 458 } 459 460 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 461 ierr = PetscSFSetGraph(lsf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 462 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 463 *out = lsf; 464 PetscFunctionReturn(0); 465 } 466 467 PETSC_INTERN PetscErrorCode PetscSFCreate_Allgatherv(PetscSF sf) 468 { 469 PetscErrorCode ierr; 470 PetscSF_Allgatherv *dat = (PetscSF_Allgatherv*)sf->data; 471 472 PetscFunctionBegin; 473 sf->ops->SetUp = PetscSFSetUp_Allgatherv; 474 sf->ops->Reset = PetscSFReset_Allgatherv; 475 sf->ops->Destroy = PetscSFDestroy_Allgatherv; 476 sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 477 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 478 sf->ops->GetGraph = PetscSFGetGraph_Allgatherv; 479 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Allgatherv; 480 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 481 sf->ops->ReduceBegin = PetscSFReduceBegin_Allgatherv; 482 sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 483 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Allgatherv; 484 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 485 sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Allgatherv; 486 sf->ops->BcastToZero = PetscSFBcastToZero_Allgatherv; 487 488 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 489 sf->data = (void*)dat; 490 PetscFunctionReturn(0); 491 } 492