1 2 #include <petsc/private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9 #if !defined(PETSC_HAVE_MPIUNI) 10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 13 #endif 14 15 /* 16 MatStashCreate_Private - Creates a stash,currently used for all the parallel 17 matrix implementations. The stash is where elements of a matrix destined 18 to be stored on other processors are kept until matrix assembly is done. 19 20 This is a simple minded stash. Simply adds entries to end of stash. 21 22 Input Parameters: 23 comm - communicator, required for scatters. 24 bs - stash block size. used when stashing blocks of values 25 26 Output Parameters: 27 stash - the newly created stash 28 */ 29 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 30 { 31 PetscErrorCode ierr; 32 PetscInt max,*opt,nopt,i; 33 PetscBool flg; 34 35 PetscFunctionBegin; 36 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 37 stash->comm = comm; 38 39 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 40 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 41 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 42 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 43 ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 44 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 45 46 47 nopt = stash->size; 48 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 49 ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 50 if (flg) { 51 if (nopt == 1) max = opt[0]; 52 else if (nopt == stash->size) max = opt[stash->rank]; 53 else if (stash->rank < nopt) max = opt[stash->rank]; 54 else max = 0; /* Use default */ 55 stash->umax = max; 56 } else { 57 stash->umax = 0; 58 } 59 ierr = PetscFree(opt);CHKERRQ(ierr); 60 if (bs <= 0) bs = 1; 61 62 stash->bs = bs; 63 stash->nmax = 0; 64 stash->oldnmax = 0; 65 stash->n = 0; 66 stash->reallocs = -1; 67 stash->space_head = NULL; 68 stash->space = NULL; 69 70 stash->send_waits = NULL; 71 stash->recv_waits = NULL; 72 stash->send_status = NULL; 73 stash->nsends = 0; 74 stash->nrecvs = 0; 75 stash->svalues = NULL; 76 stash->rvalues = NULL; 77 stash->rindices = NULL; 78 stash->nprocessed = 0; 79 stash->reproduce = PETSC_FALSE; 80 stash->blocktype = MPI_DATATYPE_NULL; 81 82 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 83 #if !defined(PETSC_HAVE_MPIUNI) 84 flg = PETSC_FALSE; 85 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr); 86 if (!flg) { 87 stash->ScatterBegin = MatStashScatterBegin_BTS; 88 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 89 stash->ScatterEnd = MatStashScatterEnd_BTS; 90 stash->ScatterDestroy = MatStashScatterDestroy_BTS; 91 } else { 92 #endif 93 stash->ScatterBegin = MatStashScatterBegin_Ref; 94 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 95 stash->ScatterEnd = MatStashScatterEnd_Ref; 96 stash->ScatterDestroy = NULL; 97 #if !defined(PETSC_HAVE_MPIUNI) 98 } 99 #endif 100 PetscFunctionReturn(0); 101 } 102 103 /* 104 MatStashDestroy_Private - Destroy the stash 105 */ 106 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 107 { 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 112 if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 113 114 stash->space = NULL; 115 116 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 /* 121 MatStashScatterEnd_Private - This is called as the final stage of 122 scatter. The final stages of message passing is done here, and 123 all the memory used for message passing is cleaned up. This 124 routine also resets the stash, and deallocates the memory used 125 for the stash. It also keeps track of the current memory usage 126 so that the same value can be used the next time through. 127 */ 128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 129 { 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 138 { 139 PetscErrorCode ierr; 140 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 141 MPI_Status *send_status; 142 143 PetscFunctionBegin; 144 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 145 /* wait on sends */ 146 if (nsends) { 147 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 148 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 149 ierr = PetscFree(send_status);CHKERRQ(ierr); 150 } 151 152 /* Now update nmaxold to be app 10% more than max n used, this way the 153 wastage of space is reduced the next time this stash is used. 154 Also update the oldmax, only if it increases */ 155 if (stash->n) { 156 bs2 = stash->bs*stash->bs; 157 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 158 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 159 } 160 161 stash->nmax = 0; 162 stash->n = 0; 163 stash->reallocs = -1; 164 stash->nprocessed = 0; 165 166 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 167 168 stash->space = NULL; 169 170 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 171 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 172 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 173 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 174 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 175 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 176 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 /* 181 MatStashGetInfo_Private - Gets the relavant statistics of the stash 182 183 Input Parameters: 184 stash - the stash 185 nstash - the size of the stash. Indicates the number of values stored. 186 reallocs - the number of additional mallocs incurred. 187 188 */ 189 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 190 { 191 PetscInt bs2 = stash->bs*stash->bs; 192 193 PetscFunctionBegin; 194 if (nstash) *nstash = stash->n*bs2; 195 if (reallocs) { 196 if (stash->reallocs < 0) *reallocs = 0; 197 else *reallocs = stash->reallocs; 198 } 199 PetscFunctionReturn(0); 200 } 201 202 /* 203 MatStashSetInitialSize_Private - Sets the initial size of the stash 204 205 Input Parameters: 206 stash - the stash 207 max - the value that is used as the max size of the stash. 208 this value is used while allocating memory. 209 */ 210 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 211 { 212 PetscFunctionBegin; 213 stash->umax = max; 214 PetscFunctionReturn(0); 215 } 216 217 /* MatStashExpand_Private - Expand the stash. This function is called 218 when the space in the stash is not sufficient to add the new values 219 being inserted into the stash. 220 221 Input Parameters: 222 stash - the stash 223 incr - the minimum increase requested 224 225 Notes: 226 This routine doubles the currently used memory. 227 */ 228 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 229 { 230 PetscErrorCode ierr; 231 PetscInt newnmax,bs2= stash->bs*stash->bs; 232 233 PetscFunctionBegin; 234 /* allocate a larger stash */ 235 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 236 if (stash->umax) newnmax = stash->umax/bs2; 237 else newnmax = DEFAULT_STASH_SIZE/bs2; 238 } else if (!stash->nmax) { /* resuing stash */ 239 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 240 else newnmax = stash->oldnmax/bs2; 241 } else newnmax = stash->nmax*2; 242 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 243 244 /* Get a MatStashSpace and attach it to stash */ 245 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 246 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 247 stash->space_head = stash->space; 248 } 249 250 stash->reallocs++; 251 stash->nmax = newnmax; 252 PetscFunctionReturn(0); 253 } 254 /* 255 MatStashValuesRow_Private - inserts values into the stash. This function 256 expects the values to be roworiented. Multiple columns belong to the same row 257 can be inserted with a single call to this function. 258 259 Input Parameters: 260 stash - the stash 261 row - the global row correspoiding to the values 262 n - the number of elements inserted. All elements belong to the above row. 263 idxn - the global column indices corresponding to each of the values. 264 values - the values inserted 265 */ 266 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 267 { 268 PetscErrorCode ierr; 269 PetscInt i,k,cnt = 0; 270 PetscMatStashSpace space=stash->space; 271 272 PetscFunctionBegin; 273 /* Check and see if we have sufficient memory */ 274 if (!space || space->local_remaining < n) { 275 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 276 } 277 space = stash->space; 278 k = space->local_used; 279 for (i=0; i<n; i++) { 280 if (ignorezeroentries && (values[i] == 0.0)) continue; 281 space->idx[k] = row; 282 space->idy[k] = idxn[i]; 283 space->val[k] = values[i]; 284 k++; 285 cnt++; 286 } 287 stash->n += cnt; 288 space->local_used += cnt; 289 space->local_remaining -= cnt; 290 PetscFunctionReturn(0); 291 } 292 293 /* 294 MatStashValuesCol_Private - inserts values into the stash. This function 295 expects the values to be columnoriented. Multiple columns belong to the same row 296 can be inserted with a single call to this function. 297 298 Input Parameters: 299 stash - the stash 300 row - the global row correspoiding to the values 301 n - the number of elements inserted. All elements belong to the above row. 302 idxn - the global column indices corresponding to each of the values. 303 values - the values inserted 304 stepval - the consecutive values are sepated by a distance of stepval. 305 this happens because the input is columnoriented. 306 */ 307 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 308 { 309 PetscErrorCode ierr; 310 PetscInt i,k,cnt = 0; 311 PetscMatStashSpace space=stash->space; 312 313 PetscFunctionBegin; 314 /* Check and see if we have sufficient memory */ 315 if (!space || space->local_remaining < n) { 316 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 317 } 318 space = stash->space; 319 k = space->local_used; 320 for (i=0; i<n; i++) { 321 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 322 space->idx[k] = row; 323 space->idy[k] = idxn[i]; 324 space->val[k] = values[i*stepval]; 325 k++; 326 cnt++; 327 } 328 stash->n += cnt; 329 space->local_used += cnt; 330 space->local_remaining -= cnt; 331 PetscFunctionReturn(0); 332 } 333 334 /* 335 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 336 This function expects the values to be roworiented. Multiple columns belong 337 to the same block-row can be inserted with a single call to this function. 338 This function extracts the sub-block of values based on the dimensions of 339 the original input block, and the row,col values corresponding to the blocks. 340 341 Input Parameters: 342 stash - the stash 343 row - the global block-row correspoiding to the values 344 n - the number of elements inserted. All elements belong to the above row. 345 idxn - the global block-column indices corresponding to each of the blocks of 346 values. Each block is of size bs*bs. 347 values - the values inserted 348 rmax - the number of block-rows in the original block. 349 cmax - the number of block-columsn on the original block. 350 idx - the index of the current block-row in the original block. 351 */ 352 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 353 { 354 PetscErrorCode ierr; 355 PetscInt i,j,k,bs2,bs=stash->bs,l; 356 const PetscScalar *vals; 357 PetscScalar *array; 358 PetscMatStashSpace space=stash->space; 359 360 PetscFunctionBegin; 361 if (!space || space->local_remaining < n) { 362 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 363 } 364 space = stash->space; 365 l = space->local_used; 366 bs2 = bs*bs; 367 for (i=0; i<n; i++) { 368 space->idx[l] = row; 369 space->idy[l] = idxn[i]; 370 /* Now copy over the block of values. Store the values column oriented. 371 This enables inserting multiple blocks belonging to a row with a single 372 funtion call */ 373 array = space->val + bs2*l; 374 vals = values + idx*bs2*n + bs*i; 375 for (j=0; j<bs; j++) { 376 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 377 array++; 378 vals += cmax*bs; 379 } 380 l++; 381 } 382 stash->n += n; 383 space->local_used += n; 384 space->local_remaining -= n; 385 PetscFunctionReturn(0); 386 } 387 388 /* 389 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 390 This function expects the values to be roworiented. Multiple columns belong 391 to the same block-row can be inserted with a single call to this function. 392 This function extracts the sub-block of values based on the dimensions of 393 the original input block, and the row,col values corresponding to the blocks. 394 395 Input Parameters: 396 stash - the stash 397 row - the global block-row correspoiding to the values 398 n - the number of elements inserted. All elements belong to the above row. 399 idxn - the global block-column indices corresponding to each of the blocks of 400 values. Each block is of size bs*bs. 401 values - the values inserted 402 rmax - the number of block-rows in the original block. 403 cmax - the number of block-columsn on the original block. 404 idx - the index of the current block-row in the original block. 405 */ 406 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 407 { 408 PetscErrorCode ierr; 409 PetscInt i,j,k,bs2,bs=stash->bs,l; 410 const PetscScalar *vals; 411 PetscScalar *array; 412 PetscMatStashSpace space=stash->space; 413 414 PetscFunctionBegin; 415 if (!space || space->local_remaining < n) { 416 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 417 } 418 space = stash->space; 419 l = space->local_used; 420 bs2 = bs*bs; 421 for (i=0; i<n; i++) { 422 space->idx[l] = row; 423 space->idy[l] = idxn[i]; 424 /* Now copy over the block of values. Store the values column oriented. 425 This enables inserting multiple blocks belonging to a row with a single 426 funtion call */ 427 array = space->val + bs2*l; 428 vals = values + idx*bs2*n + bs*i; 429 for (j=0; j<bs; j++) { 430 for (k=0; k<bs; k++) array[k] = vals[k]; 431 array += bs; 432 vals += rmax*bs; 433 } 434 l++; 435 } 436 stash->n += n; 437 space->local_used += n; 438 space->local_remaining -= n; 439 PetscFunctionReturn(0); 440 } 441 /* 442 MatStashScatterBegin_Private - Initiates the transfer of values to the 443 correct owners. This function goes through the stash, and check the 444 owners of each stashed value, and sends the values off to the owner 445 processors. 446 447 Input Parameters: 448 stash - the stash 449 owners - an array of size 'no-of-procs' which gives the ownership range 450 for each node. 451 452 Notes: 453 The 'owners' array in the cased of the blocked-stash has the 454 ranges specified blocked global indices, and for the regular stash in 455 the proper global indices. 456 */ 457 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 458 { 459 PetscErrorCode ierr; 460 461 PetscFunctionBegin; 462 ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 467 { 468 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 469 PetscInt size=stash->size,nsends; 470 PetscErrorCode ierr; 471 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 472 PetscScalar **rvalues,*svalues; 473 MPI_Comm comm = stash->comm; 474 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 475 PetscMPIInt *sizes,*nlengths,nreceives; 476 PetscInt *sp_idx,*sp_idy; 477 PetscScalar *sp_val; 478 PetscMatStashSpace space,space_next; 479 480 PetscFunctionBegin; 481 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 482 InsertMode addv; 483 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 484 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 485 mat->insertmode = addv; /* in case this processor had no cache */ 486 } 487 488 bs2 = stash->bs*stash->bs; 489 490 /* first count number of contributors to each processor */ 491 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 492 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 493 494 i = j = 0; 495 lastidx = -1; 496 space = stash->space_head; 497 while (space) { 498 space_next = space->next; 499 sp_idx = space->idx; 500 for (l=0; l<space->local_used; l++) { 501 /* if indices are NOT locally sorted, need to start search at the beginning */ 502 if (lastidx > (idx = sp_idx[l])) j = 0; 503 lastidx = idx; 504 for (; j<size; j++) { 505 if (idx >= owners[j] && idx < owners[j+1]) { 506 nlengths[j]++; owner[i] = j; break; 507 } 508 } 509 i++; 510 } 511 space = space_next; 512 } 513 514 /* Now check what procs get messages - and compute nsends. */ 515 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 516 for (i=0, nsends=0; i<size; i++) { 517 if (nlengths[i]) { 518 sizes[i] = 1; nsends++; 519 } 520 } 521 522 {PetscMPIInt *onodes,*olengths; 523 /* Determine the number of messages to expect, their lengths, from from-ids */ 524 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 525 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 526 /* since clubbing row,col - lengths are multiplied by 2 */ 527 for (i=0; i<nreceives; i++) olengths[i] *=2; 528 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 529 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 530 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 531 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 532 ierr = PetscFree(onodes);CHKERRQ(ierr); 533 ierr = PetscFree(olengths);CHKERRQ(ierr);} 534 535 /* do sends: 536 1) starts[i] gives the starting index in svalues for stuff going to 537 the ith processor 538 */ 539 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 540 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 541 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 542 /* use 2 sends the first with all_a, the next with all_i and all_j */ 543 startv[0] = 0; starti[0] = 0; 544 for (i=1; i<size; i++) { 545 startv[i] = startv[i-1] + nlengths[i-1]; 546 starti[i] = starti[i-1] + 2*nlengths[i-1]; 547 } 548 549 i = 0; 550 space = stash->space_head; 551 while (space) { 552 space_next = space->next; 553 sp_idx = space->idx; 554 sp_idy = space->idy; 555 sp_val = space->val; 556 for (l=0; l<space->local_used; l++) { 557 j = owner[i]; 558 if (bs2 == 1) { 559 svalues[startv[j]] = sp_val[l]; 560 } else { 561 PetscInt k; 562 PetscScalar *buf1,*buf2; 563 buf1 = svalues+bs2*startv[j]; 564 buf2 = space->val + bs2*l; 565 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 566 } 567 sindices[starti[j]] = sp_idx[l]; 568 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 569 startv[j]++; 570 starti[j]++; 571 i++; 572 } 573 space = space_next; 574 } 575 startv[0] = 0; 576 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 577 578 for (i=0,count=0; i<size; i++) { 579 if (sizes[i]) { 580 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 581 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 582 } 583 } 584 #if defined(PETSC_USE_INFO) 585 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 586 for (i=0; i<size; i++) { 587 if (sizes[i]) { 588 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 589 } 590 } 591 #endif 592 ierr = PetscFree(nlengths);CHKERRQ(ierr); 593 ierr = PetscFree(owner);CHKERRQ(ierr); 594 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 595 ierr = PetscFree(sizes);CHKERRQ(ierr); 596 597 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 598 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 599 600 for (i=0; i<nreceives; i++) { 601 recv_waits[2*i] = recv_waits1[i]; 602 recv_waits[2*i+1] = recv_waits2[i]; 603 } 604 stash->recv_waits = recv_waits; 605 606 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 607 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 608 609 stash->svalues = svalues; 610 stash->sindices = sindices; 611 stash->rvalues = rvalues; 612 stash->rindices = rindices; 613 stash->send_waits = send_waits; 614 stash->nsends = nsends; 615 stash->nrecvs = nreceives; 616 stash->reproduce_count = 0; 617 PetscFunctionReturn(0); 618 } 619 620 /* 621 MatStashScatterGetMesg_Private - This function waits on the receives posted 622 in the function MatStashScatterBegin_Private() and returns one message at 623 a time to the calling function. If no messages are left, it indicates this 624 by setting flg = 0, else it sets flg = 1. 625 626 Input Parameters: 627 stash - the stash 628 629 Output Parameters: 630 nvals - the number of entries in the current message. 631 rows - an array of row indices (or blocked indices) corresponding to the values 632 cols - an array of columnindices (or blocked indices) corresponding to the values 633 vals - the values 634 flg - 0 indicates no more message left, and the current call has no values associated. 635 1 indicates that the current call successfully received a message, and the 636 other output parameters nvals,rows,cols,vals are set appropriately. 637 */ 638 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 639 { 640 PetscErrorCode ierr; 641 642 PetscFunctionBegin; 643 ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 648 { 649 PetscErrorCode ierr; 650 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 651 PetscInt bs2; 652 MPI_Status recv_status; 653 PetscBool match_found = PETSC_FALSE; 654 655 PetscFunctionBegin; 656 *flg = 0; /* When a message is discovered this is reset to 1 */ 657 /* Return if no more messages to process */ 658 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 659 660 bs2 = stash->bs*stash->bs; 661 /* If a matching pair of receives are found, process them, and return the data to 662 the calling function. Until then keep receiving messages */ 663 while (!match_found) { 664 if (stash->reproduce) { 665 i = stash->reproduce_count++; 666 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 667 } else { 668 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 669 } 670 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 671 672 /* Now pack the received message into a structure which is usable by others */ 673 if (i % 2) { 674 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 675 676 flg_v[2*recv_status.MPI_SOURCE] = i/2; 677 678 *nvals = *nvals/bs2; 679 } else { 680 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 681 682 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 683 684 *nvals = *nvals/2; /* This message has both row indices and col indices */ 685 } 686 687 /* Check if we have both messages from this proc */ 688 i1 = flg_v[2*recv_status.MPI_SOURCE]; 689 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 690 if (i1 != -1 && i2 != -1) { 691 *rows = stash->rindices[i2]; 692 *cols = *rows + *nvals; 693 *vals = stash->rvalues[i1]; 694 *flg = 1; 695 stash->nprocessed++; 696 match_found = PETSC_TRUE; 697 } 698 } 699 PetscFunctionReturn(0); 700 } 701 702 #if !defined(PETSC_HAVE_MPIUNI) 703 typedef struct { 704 PetscInt row; 705 PetscInt col; 706 PetscScalar vals[1]; /* Actually an array of length bs2 */ 707 } MatStashBlock; 708 709 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 710 { 711 PetscErrorCode ierr; 712 PetscMatStashSpace space; 713 PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 714 PetscScalar **valptr; 715 716 PetscFunctionBegin; 717 ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 718 for (space=stash->space_head,cnt=0; space; space=space->next) { 719 for (i=0; i<space->local_used; i++) { 720 row[cnt] = space->idx[i]; 721 col[cnt] = space->idy[i]; 722 valptr[cnt] = &space->val[i*bs2]; 723 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 724 cnt++; 725 } 726 } 727 if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 728 ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 729 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 730 for (rowstart=0,cnt=0,i=1; i<=n; i++) { 731 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 732 PetscInt colstart; 733 ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 734 for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 735 PetscInt j,l; 736 MatStashBlock *block; 737 ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 738 block->row = row[rowstart]; 739 block->col = col[colstart]; 740 ierr = PetscArraycpy(block->vals,valptr[perm[colstart]],bs2);CHKERRQ(ierr); 741 for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 742 if (insertmode == ADD_VALUES) { 743 for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 744 } else { 745 ierr = PetscArraycpy(block->vals,valptr[perm[j]],bs2);CHKERRQ(ierr); 746 } 747 } 748 colstart = j; 749 } 750 rowstart = i; 751 } 752 } 753 ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 758 { 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 if (stash->blocktype == MPI_DATATYPE_NULL) { 763 PetscInt bs2 = PetscSqr(stash->bs); 764 PetscMPIInt blocklens[2]; 765 MPI_Aint displs[2]; 766 MPI_Datatype types[2],stype; 767 /* Note that DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex. 768 std::complex itself has standard layout, so does DummyBlock, recursively. 769 To be compatiable with C++ std::complex, complex implementations on GPUs must also have standard layout, 770 though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++. 771 offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof. 772 773 We can test if std::complex has standard layout with the following code: 774 #include <iostream> 775 #include <type_traits> 776 #include <complex> 777 int main() { 778 std::cout << std::boolalpha; 779 std::cout << std::is_standard_layout<std::complex<double> >::value << '\n'; 780 } 781 Output: true 782 */ 783 struct DummyBlock {PetscInt row,col; PetscScalar vals;}; 784 785 stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 786 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 787 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 788 } 789 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 790 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 791 ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 792 blocklens[0] = 2; 793 blocklens[1] = bs2; 794 displs[0] = offsetof(struct DummyBlock,row); 795 displs[1] = offsetof(struct DummyBlock,vals); 796 types[0] = MPIU_INT; 797 types[1] = MPIU_SCALAR; 798 ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 799 ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 800 ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 801 ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 802 ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 /* Callback invoked after target rank has initiatied receive of rendezvous message. 808 * Here we post the main sends. 809 */ 810 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 811 { 812 MatStash *stash = (MatStash*)ctx; 813 MatStashHeader *hdr = (MatStashHeader*)sdata; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 818 ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 819 stash->sendframes[rankid].count = hdr->count; 820 stash->sendframes[rankid].pending = 1; 821 PetscFunctionReturn(0); 822 } 823 824 /* Callback invoked by target after receiving rendezvous message. 825 * Here we post the main recvs. 826 */ 827 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 828 { 829 MatStash *stash = (MatStash*)ctx; 830 MatStashHeader *hdr = (MatStashHeader*)rdata; 831 MatStashFrame *frame; 832 PetscErrorCode ierr; 833 834 PetscFunctionBegin; 835 ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 836 ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 837 ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 838 frame->count = hdr->count; 839 frame->pending = 1; 840 PetscFunctionReturn(0); 841 } 842 843 /* 844 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 845 */ 846 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 847 { 848 PetscErrorCode ierr; 849 size_t nblocks; 850 char *sendblocks; 851 852 PetscFunctionBegin; 853 if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */ 854 InsertMode addv; 855 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 856 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 857 } 858 859 ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 860 ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 861 ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 862 ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 863 if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 864 PetscInt i; 865 size_t b; 866 for (i=0,b=0; i<stash->nsendranks; i++) { 867 stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 868 /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 869 stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 870 for ( ; b<nblocks; b++) { 871 MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 872 if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 873 if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 874 stash->sendhdr[i].count++; 875 } 876 } 877 } else { /* Dynamically count and pack (first time) */ 878 PetscInt sendno; 879 size_t i,rowstart; 880 881 /* Count number of send ranks and allocate for sends */ 882 stash->nsendranks = 0; 883 for (rowstart=0; rowstart<nblocks; ) { 884 PetscInt owner; 885 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 886 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 887 if (owner < 0) owner = -(owner+2); 888 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 889 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 890 if (sendblock_i->row >= owners[owner+1]) break; 891 } 892 stash->nsendranks++; 893 rowstart = i; 894 } 895 ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 896 897 /* Set up sendhdrs and sendframes */ 898 sendno = 0; 899 for (rowstart=0; rowstart<nblocks; ) { 900 PetscInt owner; 901 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 902 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 903 if (owner < 0) owner = -(owner+2); 904 stash->sendranks[sendno] = owner; 905 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 906 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 907 if (sendblock_i->row >= owners[owner+1]) break; 908 } 909 stash->sendframes[sendno].buffer = sendblock_rowstart; 910 stash->sendframes[sendno].pending = 0; 911 stash->sendhdr[sendno].count = i - rowstart; 912 sendno++; 913 rowstart = i; 914 } 915 if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 916 } 917 918 /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 919 * message or a dummy entry of some sort. */ 920 if (mat->insertmode == INSERT_VALUES) { 921 size_t i; 922 for (i=0; i<nblocks; i++) { 923 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 924 sendblock_i->row = -(sendblock_i->row+1); 925 } 926 } 927 928 if (stash->first_assembly_done) { 929 PetscMPIInt i,tag; 930 ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 931 for (i=0; i<stash->nrecvranks; i++) { 932 ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 933 } 934 for (i=0; i<stash->nsendranks; i++) { 935 ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 936 } 937 stash->use_status = PETSC_TRUE; /* Use count from message status. */ 938 } else { 939 ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 940 &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 941 MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 942 ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 943 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 944 } 945 946 ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 947 stash->recvframe_active = NULL; 948 stash->recvframe_i = 0; 949 stash->some_i = 0; 950 stash->some_count = 0; 951 stash->recvcount = 0; 952 stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */ 953 stash->insertmode = &mat->insertmode; 954 PetscFunctionReturn(0); 955 } 956 957 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 958 { 959 PetscErrorCode ierr; 960 MatStashBlock *block; 961 962 PetscFunctionBegin; 963 *flg = 0; 964 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 965 if (stash->some_i == stash->some_count) { 966 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 967 ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 968 stash->some_i = 0; 969 } 970 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 971 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 972 if (stash->use_status) { /* Count what was actually sent */ 973 ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 974 } 975 if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 976 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 977 if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 978 if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 979 if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 980 } 981 stash->some_i++; 982 stash->recvcount++; 983 stash->recvframe_i = 0; 984 } 985 *n = 1; 986 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 987 if (block->row < 0) block->row = -(block->row + 1); 988 *row = &block->row; 989 *col = &block->col; 990 *val = block->vals; 991 stash->recvframe_i++; 992 *flg = 1; 993 PetscFunctionReturn(0); 994 } 995 996 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 997 { 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1002 if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 1003 void *dummy; 1004 ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 1005 } else { /* No reuse, so collect everything. */ 1006 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 1007 } 1008 1009 /* Now update nmaxold to be app 10% more than max n used, this way the 1010 wastage of space is reduced the next time this stash is used. 1011 Also update the oldmax, only if it increases */ 1012 if (stash->n) { 1013 PetscInt bs2 = stash->bs*stash->bs; 1014 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1015 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1016 } 1017 1018 stash->nmax = 0; 1019 stash->n = 0; 1020 stash->reallocs = -1; 1021 stash->nprocessed = 0; 1022 1023 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1024 1025 stash->space = NULL; 1026 1027 PetscFunctionReturn(0); 1028 } 1029 1030 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1031 { 1032 PetscErrorCode ierr; 1033 1034 PetscFunctionBegin; 1035 ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1036 ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1037 stash->recvframes = NULL; 1038 ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1039 if (stash->blocktype != MPI_DATATYPE_NULL) { 1040 ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1041 } 1042 stash->nsendranks = 0; 1043 stash->nrecvranks = 0; 1044 ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1045 ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1046 ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1047 ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1048 ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1049 ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 #endif 1053