1 2 #include <petsc/private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9 #if !defined(PETSC_HAVE_MPIUNI) 10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 13 #endif 14 15 /* 16 MatStashCreate_Private - Creates a stash,currently used for all the parallel 17 matrix implementations. The stash is where elements of a matrix destined 18 to be stored on other processors are kept until matrix assembly is done. 19 20 This is a simple minded stash. Simply adds entries to end of stash. 21 22 Input Parameters: 23 comm - communicator, required for scatters. 24 bs - stash block size. used when stashing blocks of values 25 26 Output Parameters: 27 stash - the newly created stash 28 */ 29 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 30 { 31 PetscErrorCode ierr; 32 PetscInt max,*opt,nopt,i; 33 PetscBool flg; 34 35 PetscFunctionBegin; 36 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 37 stash->comm = comm; 38 39 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 40 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 41 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRMPI(ierr); 42 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRMPI(ierr); 43 ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 44 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 45 46 nopt = stash->size; 47 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 48 ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 49 if (flg) { 50 if (nopt == 1) max = opt[0]; 51 else if (nopt == stash->size) max = opt[stash->rank]; 52 else if (stash->rank < nopt) max = opt[stash->rank]; 53 else max = 0; /* Use default */ 54 stash->umax = max; 55 } else { 56 stash->umax = 0; 57 } 58 ierr = PetscFree(opt);CHKERRQ(ierr); 59 if (bs <= 0) bs = 1; 60 61 stash->bs = bs; 62 stash->nmax = 0; 63 stash->oldnmax = 0; 64 stash->n = 0; 65 stash->reallocs = -1; 66 stash->space_head = NULL; 67 stash->space = NULL; 68 69 stash->send_waits = NULL; 70 stash->recv_waits = NULL; 71 stash->send_status = NULL; 72 stash->nsends = 0; 73 stash->nrecvs = 0; 74 stash->svalues = NULL; 75 stash->rvalues = NULL; 76 stash->rindices = NULL; 77 stash->nprocessed = 0; 78 stash->reproduce = PETSC_FALSE; 79 stash->blocktype = MPI_DATATYPE_NULL; 80 81 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 82 #if !defined(PETSC_HAVE_MPIUNI) 83 flg = PETSC_FALSE; 84 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr); 85 if (!flg) { 86 stash->ScatterBegin = MatStashScatterBegin_BTS; 87 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 88 stash->ScatterEnd = MatStashScatterEnd_BTS; 89 stash->ScatterDestroy = MatStashScatterDestroy_BTS; 90 } else { 91 #endif 92 stash->ScatterBegin = MatStashScatterBegin_Ref; 93 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 94 stash->ScatterEnd = MatStashScatterEnd_Ref; 95 stash->ScatterDestroy = NULL; 96 #if !defined(PETSC_HAVE_MPIUNI) 97 } 98 #endif 99 PetscFunctionReturn(0); 100 } 101 102 /* 103 MatStashDestroy_Private - Destroy the stash 104 */ 105 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 106 { 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 111 if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 112 113 stash->space = NULL; 114 115 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 116 PetscFunctionReturn(0); 117 } 118 119 /* 120 MatStashScatterEnd_Private - This is called as the final stage of 121 scatter. The final stages of message passing is done here, and 122 all the memory used for message passing is cleaned up. This 123 routine also resets the stash, and deallocates the memory used 124 for the stash. It also keeps track of the current memory usage 125 so that the same value can be used the next time through. 126 */ 127 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 128 { 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 137 { 138 PetscErrorCode ierr; 139 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 140 MPI_Status *send_status; 141 142 PetscFunctionBegin; 143 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 144 /* wait on sends */ 145 if (nsends) { 146 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 147 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRMPI(ierr); 148 ierr = PetscFree(send_status);CHKERRQ(ierr); 149 } 150 151 /* Now update nmaxold to be app 10% more than max n used, this way the 152 wastage of space is reduced the next time this stash is used. 153 Also update the oldmax, only if it increases */ 154 if (stash->n) { 155 bs2 = stash->bs*stash->bs; 156 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 157 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 158 } 159 160 stash->nmax = 0; 161 stash->n = 0; 162 stash->reallocs = -1; 163 stash->nprocessed = 0; 164 165 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 166 167 stash->space = NULL; 168 169 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 170 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 171 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 172 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 173 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 174 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 175 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 /* 180 MatStashGetInfo_Private - Gets the relavant statistics of the stash 181 182 Input Parameters: 183 stash - the stash 184 nstash - the size of the stash. Indicates the number of values stored. 185 reallocs - the number of additional mallocs incurred. 186 187 */ 188 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 189 { 190 PetscInt bs2 = stash->bs*stash->bs; 191 192 PetscFunctionBegin; 193 if (nstash) *nstash = stash->n*bs2; 194 if (reallocs) { 195 if (stash->reallocs < 0) *reallocs = 0; 196 else *reallocs = stash->reallocs; 197 } 198 PetscFunctionReturn(0); 199 } 200 201 /* 202 MatStashSetInitialSize_Private - Sets the initial size of the stash 203 204 Input Parameters: 205 stash - the stash 206 max - the value that is used as the max size of the stash. 207 this value is used while allocating memory. 208 */ 209 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 210 { 211 PetscFunctionBegin; 212 stash->umax = max; 213 PetscFunctionReturn(0); 214 } 215 216 /* MatStashExpand_Private - Expand the stash. This function is called 217 when the space in the stash is not sufficient to add the new values 218 being inserted into the stash. 219 220 Input Parameters: 221 stash - the stash 222 incr - the minimum increase requested 223 224 Notes: 225 This routine doubles the currently used memory. 226 */ 227 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 228 { 229 PetscErrorCode ierr; 230 PetscInt newnmax,bs2= stash->bs*stash->bs; 231 232 PetscFunctionBegin; 233 /* allocate a larger stash */ 234 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 235 if (stash->umax) newnmax = stash->umax/bs2; 236 else newnmax = DEFAULT_STASH_SIZE/bs2; 237 } else if (!stash->nmax) { /* resuing stash */ 238 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 239 else newnmax = stash->oldnmax/bs2; 240 } else newnmax = stash->nmax*2; 241 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 242 243 /* Get a MatStashSpace and attach it to stash */ 244 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 245 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 246 stash->space_head = stash->space; 247 } 248 249 stash->reallocs++; 250 stash->nmax = newnmax; 251 PetscFunctionReturn(0); 252 } 253 /* 254 MatStashValuesRow_Private - inserts values into the stash. This function 255 expects the values to be roworiented. Multiple columns belong to the same row 256 can be inserted with a single call to this function. 257 258 Input Parameters: 259 stash - the stash 260 row - the global row correspoiding to the values 261 n - the number of elements inserted. All elements belong to the above row. 262 idxn - the global column indices corresponding to each of the values. 263 values - the values inserted 264 */ 265 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 266 { 267 PetscErrorCode ierr; 268 PetscInt i,k,cnt = 0; 269 PetscMatStashSpace space=stash->space; 270 271 PetscFunctionBegin; 272 /* Check and see if we have sufficient memory */ 273 if (!space || space->local_remaining < n) { 274 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 275 } 276 space = stash->space; 277 k = space->local_used; 278 for (i=0; i<n; i++) { 279 if (ignorezeroentries && values && values[i] == 0.0) continue; 280 space->idx[k] = row; 281 space->idy[k] = idxn[i]; 282 space->val[k] = values ? values[i] : 0.0; 283 k++; 284 cnt++; 285 } 286 stash->n += cnt; 287 space->local_used += cnt; 288 space->local_remaining -= cnt; 289 PetscFunctionReturn(0); 290 } 291 292 /* 293 MatStashValuesCol_Private - inserts values into the stash. This function 294 expects the values to be columnoriented. Multiple columns belong to the same row 295 can be inserted with a single call to this function. 296 297 Input Parameters: 298 stash - the stash 299 row - the global row correspoiding to the values 300 n - the number of elements inserted. All elements belong to the above row. 301 idxn - the global column indices corresponding to each of the values. 302 values - the values inserted 303 stepval - the consecutive values are sepated by a distance of stepval. 304 this happens because the input is columnoriented. 305 */ 306 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 307 { 308 PetscErrorCode ierr; 309 PetscInt i,k,cnt = 0; 310 PetscMatStashSpace space=stash->space; 311 312 PetscFunctionBegin; 313 /* Check and see if we have sufficient memory */ 314 if (!space || space->local_remaining < n) { 315 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 316 } 317 space = stash->space; 318 k = space->local_used; 319 for (i=0; i<n; i++) { 320 if (ignorezeroentries && values && values[i*stepval] == 0.0) continue; 321 space->idx[k] = row; 322 space->idy[k] = idxn[i]; 323 space->val[k] = values ? values[i*stepval] : 0.0; 324 k++; 325 cnt++; 326 } 327 stash->n += cnt; 328 space->local_used += cnt; 329 space->local_remaining -= cnt; 330 PetscFunctionReturn(0); 331 } 332 333 /* 334 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 335 This function expects the values to be roworiented. Multiple columns belong 336 to the same block-row can be inserted with a single call to this function. 337 This function extracts the sub-block of values based on the dimensions of 338 the original input block, and the row,col values corresponding to the blocks. 339 340 Input Parameters: 341 stash - the stash 342 row - the global block-row correspoiding to the values 343 n - the number of elements inserted. All elements belong to the above row. 344 idxn - the global block-column indices corresponding to each of the blocks of 345 values. Each block is of size bs*bs. 346 values - the values inserted 347 rmax - the number of block-rows in the original block. 348 cmax - the number of block-columsn on the original block. 349 idx - the index of the current block-row in the original block. 350 */ 351 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 352 { 353 PetscErrorCode ierr; 354 PetscInt i,j,k,bs2,bs=stash->bs,l; 355 const PetscScalar *vals; 356 PetscScalar *array; 357 PetscMatStashSpace space=stash->space; 358 359 PetscFunctionBegin; 360 if (!space || space->local_remaining < n) { 361 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 362 } 363 space = stash->space; 364 l = space->local_used; 365 bs2 = bs*bs; 366 for (i=0; i<n; i++) { 367 space->idx[l] = row; 368 space->idy[l] = idxn[i]; 369 /* Now copy over the block of values. Store the values column oriented. 370 This enables inserting multiple blocks belonging to a row with a single 371 funtion call */ 372 array = space->val + bs2*l; 373 vals = values + idx*bs2*n + bs*i; 374 for (j=0; j<bs; j++) { 375 for (k=0; k<bs; k++) array[k*bs] = values ? vals[k] : 0.0; 376 array++; 377 vals += cmax*bs; 378 } 379 l++; 380 } 381 stash->n += n; 382 space->local_used += n; 383 space->local_remaining -= n; 384 PetscFunctionReturn(0); 385 } 386 387 /* 388 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 389 This function expects the values to be roworiented. Multiple columns belong 390 to the same block-row can be inserted with a single call to this function. 391 This function extracts the sub-block of values based on the dimensions of 392 the original input block, and the row,col values corresponding to the blocks. 393 394 Input Parameters: 395 stash - the stash 396 row - the global block-row correspoiding to the values 397 n - the number of elements inserted. All elements belong to the above row. 398 idxn - the global block-column indices corresponding to each of the blocks of 399 values. Each block is of size bs*bs. 400 values - the values inserted 401 rmax - the number of block-rows in the original block. 402 cmax - the number of block-columsn on the original block. 403 idx - the index of the current block-row in the original block. 404 */ 405 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 406 { 407 PetscErrorCode ierr; 408 PetscInt i,j,k,bs2,bs=stash->bs,l; 409 const PetscScalar *vals; 410 PetscScalar *array; 411 PetscMatStashSpace space=stash->space; 412 413 PetscFunctionBegin; 414 if (!space || space->local_remaining < n) { 415 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 416 } 417 space = stash->space; 418 l = space->local_used; 419 bs2 = bs*bs; 420 for (i=0; i<n; i++) { 421 space->idx[l] = row; 422 space->idy[l] = idxn[i]; 423 /* Now copy over the block of values. Store the values column oriented. 424 This enables inserting multiple blocks belonging to a row with a single 425 funtion call */ 426 array = space->val + bs2*l; 427 vals = values + idx*bs2*n + bs*i; 428 for (j=0; j<bs; j++) { 429 for (k=0; k<bs; k++) array[k] = values ? vals[k] : 0.0; 430 array += bs; 431 vals += rmax*bs; 432 } 433 l++; 434 } 435 stash->n += n; 436 space->local_used += n; 437 space->local_remaining -= n; 438 PetscFunctionReturn(0); 439 } 440 /* 441 MatStashScatterBegin_Private - Initiates the transfer of values to the 442 correct owners. This function goes through the stash, and check the 443 owners of each stashed value, and sends the values off to the owner 444 processors. 445 446 Input Parameters: 447 stash - the stash 448 owners - an array of size 'no-of-procs' which gives the ownership range 449 for each node. 450 451 Notes: 452 The 'owners' array in the cased of the blocked-stash has the 453 ranges specified blocked global indices, and for the regular stash in 454 the proper global indices. 455 */ 456 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 457 { 458 PetscErrorCode ierr; 459 460 PetscFunctionBegin; 461 ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 466 { 467 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 468 PetscInt size=stash->size,nsends; 469 PetscErrorCode ierr; 470 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 471 PetscScalar **rvalues,*svalues; 472 MPI_Comm comm = stash->comm; 473 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 474 PetscMPIInt *sizes,*nlengths,nreceives; 475 PetscInt *sp_idx,*sp_idy; 476 PetscScalar *sp_val; 477 PetscMatStashSpace space,space_next; 478 479 PetscFunctionBegin; 480 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 481 InsertMode addv; 482 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 483 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 484 mat->insertmode = addv; /* in case this processor had no cache */ 485 } 486 487 bs2 = stash->bs*stash->bs; 488 489 /* first count number of contributors to each processor */ 490 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 491 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 492 493 i = j = 0; 494 lastidx = -1; 495 space = stash->space_head; 496 while (space) { 497 space_next = space->next; 498 sp_idx = space->idx; 499 for (l=0; l<space->local_used; l++) { 500 /* if indices are NOT locally sorted, need to start search at the beginning */ 501 if (lastidx > (idx = sp_idx[l])) j = 0; 502 lastidx = idx; 503 for (; j<size; j++) { 504 if (idx >= owners[j] && idx < owners[j+1]) { 505 nlengths[j]++; owner[i] = j; break; 506 } 507 } 508 i++; 509 } 510 space = space_next; 511 } 512 513 /* Now check what procs get messages - and compute nsends. */ 514 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 515 for (i=0, nsends=0; i<size; i++) { 516 if (nlengths[i]) { 517 sizes[i] = 1; nsends++; 518 } 519 } 520 521 {PetscMPIInt *onodes,*olengths; 522 /* Determine the number of messages to expect, their lengths, from from-ids */ 523 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 524 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 525 /* since clubbing row,col - lengths are multiplied by 2 */ 526 for (i=0; i<nreceives; i++) olengths[i] *=2; 527 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 528 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 529 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 530 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 531 ierr = PetscFree(onodes);CHKERRQ(ierr); 532 ierr = PetscFree(olengths);CHKERRQ(ierr);} 533 534 /* do sends: 535 1) starts[i] gives the starting index in svalues for stuff going to 536 the ith processor 537 */ 538 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 539 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 540 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 541 /* use 2 sends the first with all_a, the next with all_i and all_j */ 542 startv[0] = 0; starti[0] = 0; 543 for (i=1; i<size; i++) { 544 startv[i] = startv[i-1] + nlengths[i-1]; 545 starti[i] = starti[i-1] + 2*nlengths[i-1]; 546 } 547 548 i = 0; 549 space = stash->space_head; 550 while (space) { 551 space_next = space->next; 552 sp_idx = space->idx; 553 sp_idy = space->idy; 554 sp_val = space->val; 555 for (l=0; l<space->local_used; l++) { 556 j = owner[i]; 557 if (bs2 == 1) { 558 svalues[startv[j]] = sp_val[l]; 559 } else { 560 PetscInt k; 561 PetscScalar *buf1,*buf2; 562 buf1 = svalues+bs2*startv[j]; 563 buf2 = space->val + bs2*l; 564 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 565 } 566 sindices[starti[j]] = sp_idx[l]; 567 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 568 startv[j]++; 569 starti[j]++; 570 i++; 571 } 572 space = space_next; 573 } 574 startv[0] = 0; 575 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 576 577 for (i=0,count=0; i<size; i++) { 578 if (sizes[i]) { 579 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRMPI(ierr); 580 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRMPI(ierr); 581 } 582 } 583 #if defined(PETSC_USE_INFO) 584 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 585 for (i=0; i<size; i++) { 586 if (sizes[i]) { 587 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 588 } 589 } 590 #endif 591 ierr = PetscFree(nlengths);CHKERRQ(ierr); 592 ierr = PetscFree(owner);CHKERRQ(ierr); 593 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 594 ierr = PetscFree(sizes);CHKERRQ(ierr); 595 596 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 597 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 598 599 for (i=0; i<nreceives; i++) { 600 recv_waits[2*i] = recv_waits1[i]; 601 recv_waits[2*i+1] = recv_waits2[i]; 602 } 603 stash->recv_waits = recv_waits; 604 605 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 606 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 607 608 stash->svalues = svalues; 609 stash->sindices = sindices; 610 stash->rvalues = rvalues; 611 stash->rindices = rindices; 612 stash->send_waits = send_waits; 613 stash->nsends = nsends; 614 stash->nrecvs = nreceives; 615 stash->reproduce_count = 0; 616 PetscFunctionReturn(0); 617 } 618 619 /* 620 MatStashScatterGetMesg_Private - This function waits on the receives posted 621 in the function MatStashScatterBegin_Private() and returns one message at 622 a time to the calling function. If no messages are left, it indicates this 623 by setting flg = 0, else it sets flg = 1. 624 625 Input Parameters: 626 stash - the stash 627 628 Output Parameters: 629 nvals - the number of entries in the current message. 630 rows - an array of row indices (or blocked indices) corresponding to the values 631 cols - an array of columnindices (or blocked indices) corresponding to the values 632 vals - the values 633 flg - 0 indicates no more message left, and the current call has no values associated. 634 1 indicates that the current call successfully received a message, and the 635 other output parameters nvals,rows,cols,vals are set appropriately. 636 */ 637 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 638 { 639 PetscErrorCode ierr; 640 641 PetscFunctionBegin; 642 ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 643 PetscFunctionReturn(0); 644 } 645 646 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 647 { 648 PetscErrorCode ierr; 649 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 650 PetscInt bs2; 651 MPI_Status recv_status; 652 PetscBool match_found = PETSC_FALSE; 653 654 PetscFunctionBegin; 655 *flg = 0; /* When a message is discovered this is reset to 1 */ 656 /* Return if no more messages to process */ 657 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 658 659 bs2 = stash->bs*stash->bs; 660 /* If a matching pair of receives are found, process them, and return the data to 661 the calling function. Until then keep receiving messages */ 662 while (!match_found) { 663 if (stash->reproduce) { 664 i = stash->reproduce_count++; 665 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRMPI(ierr); 666 } else { 667 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRMPI(ierr); 668 } 669 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 670 671 /* Now pack the received message into a structure which is usable by others */ 672 if (i % 2) { 673 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRMPI(ierr); 674 675 flg_v[2*recv_status.MPI_SOURCE] = i/2; 676 677 *nvals = *nvals/bs2; 678 } else { 679 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRMPI(ierr); 680 681 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 682 683 *nvals = *nvals/2; /* This message has both row indices and col indices */ 684 } 685 686 /* Check if we have both messages from this proc */ 687 i1 = flg_v[2*recv_status.MPI_SOURCE]; 688 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 689 if (i1 != -1 && i2 != -1) { 690 *rows = stash->rindices[i2]; 691 *cols = *rows + *nvals; 692 *vals = stash->rvalues[i1]; 693 *flg = 1; 694 stash->nprocessed++; 695 match_found = PETSC_TRUE; 696 } 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #if !defined(PETSC_HAVE_MPIUNI) 702 typedef struct { 703 PetscInt row; 704 PetscInt col; 705 PetscScalar vals[1]; /* Actually an array of length bs2 */ 706 } MatStashBlock; 707 708 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 709 { 710 PetscErrorCode ierr; 711 PetscMatStashSpace space; 712 PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 713 PetscScalar **valptr; 714 715 PetscFunctionBegin; 716 ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 717 for (space=stash->space_head,cnt=0; space; space=space->next) { 718 for (i=0; i<space->local_used; i++) { 719 row[cnt] = space->idx[i]; 720 col[cnt] = space->idy[i]; 721 valptr[cnt] = &space->val[i*bs2]; 722 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 723 cnt++; 724 } 725 } 726 if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 727 ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 728 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 729 for (rowstart=0,cnt=0,i=1; i<=n; i++) { 730 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 731 PetscInt colstart; 732 ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 733 for (colstart=rowstart; colstart<i;) { /* Compress multiple insertions to the same location */ 734 PetscInt j,l; 735 MatStashBlock *block; 736 ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 737 block->row = row[rowstart]; 738 block->col = col[colstart]; 739 ierr = PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);CHKERRQ(ierr); 740 for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 741 if (insertmode == ADD_VALUES) { 742 for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 743 } else { 744 ierr = PetscArraycpy(block->vals,valptr[perm[j]],bs2);CHKERRQ(ierr); 745 } 746 } 747 colstart = j; 748 } 749 rowstart = i; 750 } 751 } 752 ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 756 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 757 { 758 PetscErrorCode ierr; 759 760 PetscFunctionBegin; 761 if (stash->blocktype == MPI_DATATYPE_NULL) { 762 PetscInt bs2 = PetscSqr(stash->bs); 763 PetscMPIInt blocklens[2]; 764 MPI_Aint displs[2]; 765 MPI_Datatype types[2],stype; 766 /* Note that DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex. 767 std::complex itself has standard layout, so does DummyBlock, recursively. 768 To be compatiable with C++ std::complex, complex implementations on GPUs must also have standard layout, 769 though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++. 770 offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof. 771 772 We can test if std::complex has standard layout with the following code: 773 #include <iostream> 774 #include <type_traits> 775 #include <complex> 776 int main() { 777 std::cout << std::boolalpha; 778 std::cout << std::is_standard_layout<std::complex<double> >::value << '\n'; 779 } 780 Output: true 781 */ 782 struct DummyBlock {PetscInt row,col; PetscScalar vals;}; 783 784 stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 785 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 786 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 787 } 788 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 789 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 790 ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 791 blocklens[0] = 2; 792 blocklens[1] = bs2; 793 displs[0] = offsetof(struct DummyBlock,row); 794 displs[1] = offsetof(struct DummyBlock,vals); 795 types[0] = MPIU_INT; 796 types[1] = MPIU_SCALAR; 797 ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRMPI(ierr); 798 ierr = MPI_Type_commit(&stype);CHKERRMPI(ierr); 799 ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRMPI(ierr); 800 ierr = MPI_Type_commit(&stash->blocktype);CHKERRMPI(ierr); 801 ierr = MPI_Type_free(&stype);CHKERRMPI(ierr); 802 } 803 PetscFunctionReturn(0); 804 } 805 806 /* Callback invoked after target rank has initiatied receive of rendezvous message. 807 * Here we post the main sends. 808 */ 809 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 810 { 811 MatStash *stash = (MatStash*)ctx; 812 MatStashHeader *hdr = (MatStashHeader*)sdata; 813 PetscErrorCode ierr; 814 815 PetscFunctionBegin; 816 if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 817 ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRMPI(ierr); 818 stash->sendframes[rankid].count = hdr->count; 819 stash->sendframes[rankid].pending = 1; 820 PetscFunctionReturn(0); 821 } 822 823 /* Callback invoked by target after receiving rendezvous message. 824 * Here we post the main recvs. 825 */ 826 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 827 { 828 MatStash *stash = (MatStash*)ctx; 829 MatStashHeader *hdr = (MatStashHeader*)rdata; 830 MatStashFrame *frame; 831 PetscErrorCode ierr; 832 833 PetscFunctionBegin; 834 ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 835 ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 836 ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRMPI(ierr); 837 frame->count = hdr->count; 838 frame->pending = 1; 839 PetscFunctionReturn(0); 840 } 841 842 /* 843 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 844 */ 845 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 846 { 847 PetscErrorCode ierr; 848 size_t nblocks; 849 char *sendblocks; 850 851 PetscFunctionBegin; 852 if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */ 853 InsertMode addv; 854 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 855 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 856 } 857 858 ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 859 ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 860 ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 861 ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 862 if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 863 PetscInt i; 864 size_t b; 865 for (i=0,b=0; i<stash->nsendranks; i++) { 866 stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 867 /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 868 stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 869 for (; b<nblocks; b++) { 870 MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 871 if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 872 if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 873 stash->sendhdr[i].count++; 874 } 875 } 876 } else { /* Dynamically count and pack (first time) */ 877 PetscInt sendno; 878 size_t i,rowstart; 879 880 /* Count number of send ranks and allocate for sends */ 881 stash->nsendranks = 0; 882 for (rowstart=0; rowstart<nblocks;) { 883 PetscInt owner; 884 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 885 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 886 if (owner < 0) owner = -(owner+2); 887 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 888 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 889 if (sendblock_i->row >= owners[owner+1]) break; 890 } 891 stash->nsendranks++; 892 rowstart = i; 893 } 894 ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 895 896 /* Set up sendhdrs and sendframes */ 897 sendno = 0; 898 for (rowstart=0; rowstart<nblocks;) { 899 PetscInt owner; 900 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 901 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 902 if (owner < 0) owner = -(owner+2); 903 stash->sendranks[sendno] = owner; 904 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 905 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 906 if (sendblock_i->row >= owners[owner+1]) break; 907 } 908 stash->sendframes[sendno].buffer = sendblock_rowstart; 909 stash->sendframes[sendno].pending = 0; 910 stash->sendhdr[sendno].count = i - rowstart; 911 sendno++; 912 rowstart = i; 913 } 914 if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 915 } 916 917 /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 918 * message or a dummy entry of some sort. */ 919 if (mat->insertmode == INSERT_VALUES) { 920 size_t i; 921 for (i=0; i<nblocks; i++) { 922 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 923 sendblock_i->row = -(sendblock_i->row+1); 924 } 925 } 926 927 if (stash->first_assembly_done) { 928 PetscMPIInt i,tag; 929 ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 930 for (i=0; i<stash->nrecvranks; i++) { 931 ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 932 } 933 for (i=0; i<stash->nsendranks; i++) { 934 ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 935 } 936 stash->use_status = PETSC_TRUE; /* Use count from message status. */ 937 } else { 938 ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 939 &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 940 MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 941 ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 942 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 943 } 944 945 ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 946 stash->recvframe_active = NULL; 947 stash->recvframe_i = 0; 948 stash->some_i = 0; 949 stash->some_count = 0; 950 stash->recvcount = 0; 951 stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */ 952 stash->insertmode = &mat->insertmode; 953 PetscFunctionReturn(0); 954 } 955 956 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 957 { 958 PetscErrorCode ierr; 959 MatStashBlock *block; 960 961 PetscFunctionBegin; 962 *flg = 0; 963 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 964 if (stash->some_i == stash->some_count) { 965 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 966 ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 967 stash->some_i = 0; 968 } 969 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 970 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 971 if (stash->use_status) { /* Count what was actually sent */ 972 ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRMPI(ierr); 973 } 974 if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 975 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 976 if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 977 if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 978 if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 979 } 980 stash->some_i++; 981 stash->recvcount++; 982 stash->recvframe_i = 0; 983 } 984 *n = 1; 985 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 986 if (block->row < 0) block->row = -(block->row + 1); 987 *row = &block->row; 988 *col = &block->col; 989 *val = block->vals; 990 stash->recvframe_i++; 991 *flg = 1; 992 PetscFunctionReturn(0); 993 } 994 995 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 996 { 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 1001 if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 1002 void *dummy; 1003 ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 1004 } else { /* No reuse, so collect everything. */ 1005 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 1006 } 1007 1008 /* Now update nmaxold to be app 10% more than max n used, this way the 1009 wastage of space is reduced the next time this stash is used. 1010 Also update the oldmax, only if it increases */ 1011 if (stash->n) { 1012 PetscInt bs2 = stash->bs*stash->bs; 1013 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1014 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1015 } 1016 1017 stash->nmax = 0; 1018 stash->n = 0; 1019 stash->reallocs = -1; 1020 stash->nprocessed = 0; 1021 1022 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1023 1024 stash->space = NULL; 1025 1026 PetscFunctionReturn(0); 1027 } 1028 1029 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1030 { 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1035 ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1036 stash->recvframes = NULL; 1037 ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1038 if (stash->blocktype != MPI_DATATYPE_NULL) { 1039 ierr = MPI_Type_free(&stash->blocktype);CHKERRMPI(ierr); 1040 } 1041 stash->nsendranks = 0; 1042 stash->nrecvranks = 0; 1043 ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1044 ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1045 ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1046 ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1047 ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1048 ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 #endif 1052