1 2 #include <petsc/private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 12 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 13 14 /* 15 MatStashCreate_Private - Creates a stash,currently used for all the parallel 16 matrix implementations. The stash is where elements of a matrix destined 17 to be stored on other processors are kept until matrix assembly is done. 18 19 This is a simple minded stash. Simply adds entries to end of stash. 20 21 Input Parameters: 22 comm - communicator, required for scatters. 23 bs - stash block size. used when stashing blocks of values 24 25 Output Parameters: 26 stash - the newly created stash 27 */ 28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 29 { 30 PetscErrorCode ierr; 31 PetscInt max,*opt,nopt,i; 32 PetscBool flg; 33 34 PetscFunctionBegin; 35 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 36 stash->comm = comm; 37 38 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 39 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 40 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 41 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 42 ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 43 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 44 45 46 nopt = stash->size; 47 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 48 ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 49 if (flg) { 50 if (nopt == 1) max = opt[0]; 51 else if (nopt == stash->size) max = opt[stash->rank]; 52 else if (stash->rank < nopt) max = opt[stash->rank]; 53 else max = 0; /* Use default */ 54 stash->umax = max; 55 } else { 56 stash->umax = 0; 57 } 58 ierr = PetscFree(opt);CHKERRQ(ierr); 59 if (bs <= 0) bs = 1; 60 61 stash->bs = bs; 62 stash->nmax = 0; 63 stash->oldnmax = 0; 64 stash->n = 0; 65 stash->reallocs = -1; 66 stash->space_head = 0; 67 stash->space = 0; 68 69 stash->send_waits = 0; 70 stash->recv_waits = 0; 71 stash->send_status = 0; 72 stash->nsends = 0; 73 stash->nrecvs = 0; 74 stash->svalues = 0; 75 stash->rvalues = 0; 76 stash->rindices = 0; 77 stash->nprocessed = 0; 78 stash->reproduce = PETSC_FALSE; 79 stash->blocktype = MPI_DATATYPE_NULL; 80 81 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 82 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr); 83 if (flg) { 84 stash->ScatterBegin = MatStashScatterBegin_BTS; 85 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 86 stash->ScatterEnd = MatStashScatterEnd_BTS; 87 stash->ScatterDestroy = MatStashScatterDestroy_BTS; 88 } else { 89 stash->ScatterBegin = MatStashScatterBegin_Ref; 90 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 91 stash->ScatterEnd = MatStashScatterEnd_Ref; 92 stash->ScatterDestroy = NULL; 93 } 94 PetscFunctionReturn(0); 95 } 96 97 /* 98 MatStashDestroy_Private - Destroy the stash 99 */ 100 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 101 { 102 PetscErrorCode ierr; 103 104 PetscFunctionBegin; 105 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 106 if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 107 108 stash->space = 0; 109 110 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 /* 115 MatStashScatterEnd_Private - This is called as the final stage of 116 scatter. The final stages of message passing is done here, and 117 all the memory used for message passing is cleaned up. This 118 routine also resets the stash, and deallocates the memory used 119 for the stash. It also keeps track of the current memory usage 120 so that the same value can be used the next time through. 121 */ 122 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 123 { 124 PetscErrorCode ierr; 125 126 PetscFunctionBegin; 127 ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 132 { 133 PetscErrorCode ierr; 134 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 135 MPI_Status *send_status; 136 137 PetscFunctionBegin; 138 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 139 /* wait on sends */ 140 if (nsends) { 141 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 142 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 143 ierr = PetscFree(send_status);CHKERRQ(ierr); 144 } 145 146 /* Now update nmaxold to be app 10% more than max n used, this way the 147 wastage of space is reduced the next time this stash is used. 148 Also update the oldmax, only if it increases */ 149 if (stash->n) { 150 bs2 = stash->bs*stash->bs; 151 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 152 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 153 } 154 155 stash->nmax = 0; 156 stash->n = 0; 157 stash->reallocs = -1; 158 stash->nprocessed = 0; 159 160 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 161 162 stash->space = 0; 163 164 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 165 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 166 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 167 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 168 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 169 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 170 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 171 PetscFunctionReturn(0); 172 } 173 174 /* 175 MatStashGetInfo_Private - Gets the relavant statistics of the stash 176 177 Input Parameters: 178 stash - the stash 179 nstash - the size of the stash. Indicates the number of values stored. 180 reallocs - the number of additional mallocs incurred. 181 182 */ 183 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 184 { 185 PetscInt bs2 = stash->bs*stash->bs; 186 187 PetscFunctionBegin; 188 if (nstash) *nstash = stash->n*bs2; 189 if (reallocs) { 190 if (stash->reallocs < 0) *reallocs = 0; 191 else *reallocs = stash->reallocs; 192 } 193 PetscFunctionReturn(0); 194 } 195 196 /* 197 MatStashSetInitialSize_Private - Sets the initial size of the stash 198 199 Input Parameters: 200 stash - the stash 201 max - the value that is used as the max size of the stash. 202 this value is used while allocating memory. 203 */ 204 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 205 { 206 PetscFunctionBegin; 207 stash->umax = max; 208 PetscFunctionReturn(0); 209 } 210 211 /* MatStashExpand_Private - Expand the stash. This function is called 212 when the space in the stash is not sufficient to add the new values 213 being inserted into the stash. 214 215 Input Parameters: 216 stash - the stash 217 incr - the minimum increase requested 218 219 Notes: 220 This routine doubles the currently used memory. 221 */ 222 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 223 { 224 PetscErrorCode ierr; 225 PetscInt newnmax,bs2= stash->bs*stash->bs; 226 227 PetscFunctionBegin; 228 /* allocate a larger stash */ 229 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 230 if (stash->umax) newnmax = stash->umax/bs2; 231 else newnmax = DEFAULT_STASH_SIZE/bs2; 232 } else if (!stash->nmax) { /* resuing stash */ 233 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 234 else newnmax = stash->oldnmax/bs2; 235 } else newnmax = stash->nmax*2; 236 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 237 238 /* Get a MatStashSpace and attach it to stash */ 239 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 240 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 241 stash->space_head = stash->space; 242 } 243 244 stash->reallocs++; 245 stash->nmax = newnmax; 246 PetscFunctionReturn(0); 247 } 248 /* 249 MatStashValuesRow_Private - inserts values into the stash. This function 250 expects the values to be roworiented. Multiple columns belong to the same row 251 can be inserted with a single call to this function. 252 253 Input Parameters: 254 stash - the stash 255 row - the global row correspoiding to the values 256 n - the number of elements inserted. All elements belong to the above row. 257 idxn - the global column indices corresponding to each of the values. 258 values - the values inserted 259 */ 260 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 261 { 262 PetscErrorCode ierr; 263 PetscInt i,k,cnt = 0; 264 PetscMatStashSpace space=stash->space; 265 266 PetscFunctionBegin; 267 /* Check and see if we have sufficient memory */ 268 if (!space || space->local_remaining < n) { 269 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 270 } 271 space = stash->space; 272 k = space->local_used; 273 for (i=0; i<n; i++) { 274 if (ignorezeroentries && (values[i] == 0.0)) continue; 275 space->idx[k] = row; 276 space->idy[k] = idxn[i]; 277 space->val[k] = values[i]; 278 k++; 279 cnt++; 280 } 281 stash->n += cnt; 282 space->local_used += cnt; 283 space->local_remaining -= cnt; 284 PetscFunctionReturn(0); 285 } 286 287 /* 288 MatStashValuesCol_Private - inserts values into the stash. This function 289 expects the values to be columnoriented. Multiple columns belong to the same row 290 can be inserted with a single call to this function. 291 292 Input Parameters: 293 stash - the stash 294 row - the global row correspoiding to the values 295 n - the number of elements inserted. All elements belong to the above row. 296 idxn - the global column indices corresponding to each of the values. 297 values - the values inserted 298 stepval - the consecutive values are sepated by a distance of stepval. 299 this happens because the input is columnoriented. 300 */ 301 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 302 { 303 PetscErrorCode ierr; 304 PetscInt i,k,cnt = 0; 305 PetscMatStashSpace space=stash->space; 306 307 PetscFunctionBegin; 308 /* Check and see if we have sufficient memory */ 309 if (!space || space->local_remaining < n) { 310 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 311 } 312 space = stash->space; 313 k = space->local_used; 314 for (i=0; i<n; i++) { 315 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 316 space->idx[k] = row; 317 space->idy[k] = idxn[i]; 318 space->val[k] = values[i*stepval]; 319 k++; 320 cnt++; 321 } 322 stash->n += cnt; 323 space->local_used += cnt; 324 space->local_remaining -= cnt; 325 PetscFunctionReturn(0); 326 } 327 328 /* 329 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 330 This function expects the values to be roworiented. Multiple columns belong 331 to the same block-row can be inserted with a single call to this function. 332 This function extracts the sub-block of values based on the dimensions of 333 the original input block, and the row,col values corresponding to the blocks. 334 335 Input Parameters: 336 stash - the stash 337 row - the global block-row correspoiding to the values 338 n - the number of elements inserted. All elements belong to the above row. 339 idxn - the global block-column indices corresponding to each of the blocks of 340 values. Each block is of size bs*bs. 341 values - the values inserted 342 rmax - the number of block-rows in the original block. 343 cmax - the number of block-columsn on the original block. 344 idx - the index of the current block-row in the original block. 345 */ 346 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 347 { 348 PetscErrorCode ierr; 349 PetscInt i,j,k,bs2,bs=stash->bs,l; 350 const PetscScalar *vals; 351 PetscScalar *array; 352 PetscMatStashSpace space=stash->space; 353 354 PetscFunctionBegin; 355 if (!space || space->local_remaining < n) { 356 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 357 } 358 space = stash->space; 359 l = space->local_used; 360 bs2 = bs*bs; 361 for (i=0; i<n; i++) { 362 space->idx[l] = row; 363 space->idy[l] = idxn[i]; 364 /* Now copy over the block of values. Store the values column oriented. 365 This enables inserting multiple blocks belonging to a row with a single 366 funtion call */ 367 array = space->val + bs2*l; 368 vals = values + idx*bs2*n + bs*i; 369 for (j=0; j<bs; j++) { 370 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 371 array++; 372 vals += cmax*bs; 373 } 374 l++; 375 } 376 stash->n += n; 377 space->local_used += n; 378 space->local_remaining -= n; 379 PetscFunctionReturn(0); 380 } 381 382 /* 383 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 384 This function expects the values to be roworiented. Multiple columns belong 385 to the same block-row can be inserted with a single call to this function. 386 This function extracts the sub-block of values based on the dimensions of 387 the original input block, and the row,col values corresponding to the blocks. 388 389 Input Parameters: 390 stash - the stash 391 row - the global block-row correspoiding to the values 392 n - the number of elements inserted. All elements belong to the above row. 393 idxn - the global block-column indices corresponding to each of the blocks of 394 values. Each block is of size bs*bs. 395 values - the values inserted 396 rmax - the number of block-rows in the original block. 397 cmax - the number of block-columsn on the original block. 398 idx - the index of the current block-row in the original block. 399 */ 400 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 401 { 402 PetscErrorCode ierr; 403 PetscInt i,j,k,bs2,bs=stash->bs,l; 404 const PetscScalar *vals; 405 PetscScalar *array; 406 PetscMatStashSpace space=stash->space; 407 408 PetscFunctionBegin; 409 if (!space || space->local_remaining < n) { 410 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 411 } 412 space = stash->space; 413 l = space->local_used; 414 bs2 = bs*bs; 415 for (i=0; i<n; i++) { 416 space->idx[l] = row; 417 space->idy[l] = idxn[i]; 418 /* Now copy over the block of values. Store the values column oriented. 419 This enables inserting multiple blocks belonging to a row with a single 420 funtion call */ 421 array = space->val + bs2*l; 422 vals = values + idx*bs2*n + bs*i; 423 for (j=0; j<bs; j++) { 424 for (k=0; k<bs; k++) array[k] = vals[k]; 425 array += bs; 426 vals += rmax*bs; 427 } 428 l++; 429 } 430 stash->n += n; 431 space->local_used += n; 432 space->local_remaining -= n; 433 PetscFunctionReturn(0); 434 } 435 /* 436 MatStashScatterBegin_Private - Initiates the transfer of values to the 437 correct owners. This function goes through the stash, and check the 438 owners of each stashed value, and sends the values off to the owner 439 processors. 440 441 Input Parameters: 442 stash - the stash 443 owners - an array of size 'no-of-procs' which gives the ownership range 444 for each node. 445 446 Notes: The 'owners' array in the cased of the blocked-stash has the 447 ranges specified blocked global indices, and for the regular stash in 448 the proper global indices. 449 */ 450 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 451 { 452 PetscErrorCode ierr; 453 454 PetscFunctionBegin; 455 ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 459 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 460 { 461 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 462 PetscInt size=stash->size,nsends; 463 PetscErrorCode ierr; 464 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 465 PetscScalar **rvalues,*svalues; 466 MPI_Comm comm = stash->comm; 467 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 468 PetscMPIInt *sizes,*nlengths,nreceives; 469 PetscInt *sp_idx,*sp_idy; 470 PetscScalar *sp_val; 471 PetscMatStashSpace space,space_next; 472 473 PetscFunctionBegin; 474 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 475 InsertMode addv; 476 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 477 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 478 mat->insertmode = addv; /* in case this processor had no cache */ 479 } 480 481 bs2 = stash->bs*stash->bs; 482 483 /* first count number of contributors to each processor */ 484 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 485 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 486 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 487 488 i = j = 0; 489 lastidx = -1; 490 space = stash->space_head; 491 while (space) { 492 space_next = space->next; 493 sp_idx = space->idx; 494 for (l=0; l<space->local_used; l++) { 495 /* if indices are NOT locally sorted, need to start search at the beginning */ 496 if (lastidx > (idx = sp_idx[l])) j = 0; 497 lastidx = idx; 498 for (; j<size; j++) { 499 if (idx >= owners[j] && idx < owners[j+1]) { 500 nlengths[j]++; owner[i] = j; break; 501 } 502 } 503 i++; 504 } 505 space = space_next; 506 } 507 /* Now check what procs get messages - and compute nsends. */ 508 for (i=0, nsends=0; i<size; i++) { 509 if (nlengths[i]) { 510 sizes[i] = 1; nsends++; 511 } 512 } 513 514 {PetscMPIInt *onodes,*olengths; 515 /* Determine the number of messages to expect, their lengths, from from-ids */ 516 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 517 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 518 /* since clubbing row,col - lengths are multiplied by 2 */ 519 for (i=0; i<nreceives; i++) olengths[i] *=2; 520 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 521 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 522 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 523 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 524 ierr = PetscFree(onodes);CHKERRQ(ierr); 525 ierr = PetscFree(olengths);CHKERRQ(ierr);} 526 527 /* do sends: 528 1) starts[i] gives the starting index in svalues for stuff going to 529 the ith processor 530 */ 531 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 532 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 533 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 534 /* use 2 sends the first with all_a, the next with all_i and all_j */ 535 startv[0] = 0; starti[0] = 0; 536 for (i=1; i<size; i++) { 537 startv[i] = startv[i-1] + nlengths[i-1]; 538 starti[i] = starti[i-1] + 2*nlengths[i-1]; 539 } 540 541 i = 0; 542 space = stash->space_head; 543 while (space) { 544 space_next = space->next; 545 sp_idx = space->idx; 546 sp_idy = space->idy; 547 sp_val = space->val; 548 for (l=0; l<space->local_used; l++) { 549 j = owner[i]; 550 if (bs2 == 1) { 551 svalues[startv[j]] = sp_val[l]; 552 } else { 553 PetscInt k; 554 PetscScalar *buf1,*buf2; 555 buf1 = svalues+bs2*startv[j]; 556 buf2 = space->val + bs2*l; 557 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 558 } 559 sindices[starti[j]] = sp_idx[l]; 560 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 561 startv[j]++; 562 starti[j]++; 563 i++; 564 } 565 space = space_next; 566 } 567 startv[0] = 0; 568 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 569 570 for (i=0,count=0; i<size; i++) { 571 if (sizes[i]) { 572 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 573 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 574 } 575 } 576 #if defined(PETSC_USE_INFO) 577 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 578 for (i=0; i<size; i++) { 579 if (sizes[i]) { 580 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 581 } 582 } 583 #endif 584 ierr = PetscFree(nlengths);CHKERRQ(ierr); 585 ierr = PetscFree(owner);CHKERRQ(ierr); 586 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 587 ierr = PetscFree(sizes);CHKERRQ(ierr); 588 589 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 590 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 591 592 for (i=0; i<nreceives; i++) { 593 recv_waits[2*i] = recv_waits1[i]; 594 recv_waits[2*i+1] = recv_waits2[i]; 595 } 596 stash->recv_waits = recv_waits; 597 598 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 599 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 600 601 stash->svalues = svalues; 602 stash->sindices = sindices; 603 stash->rvalues = rvalues; 604 stash->rindices = rindices; 605 stash->send_waits = send_waits; 606 stash->nsends = nsends; 607 stash->nrecvs = nreceives; 608 stash->reproduce_count = 0; 609 PetscFunctionReturn(0); 610 } 611 612 /* 613 MatStashScatterGetMesg_Private - This function waits on the receives posted 614 in the function MatStashScatterBegin_Private() and returns one message at 615 a time to the calling function. If no messages are left, it indicates this 616 by setting flg = 0, else it sets flg = 1. 617 618 Input Parameters: 619 stash - the stash 620 621 Output Parameters: 622 nvals - the number of entries in the current message. 623 rows - an array of row indices (or blocked indices) corresponding to the values 624 cols - an array of columnindices (or blocked indices) corresponding to the values 625 vals - the values 626 flg - 0 indicates no more message left, and the current call has no values associated. 627 1 indicates that the current call successfully received a message, and the 628 other output parameters nvals,rows,cols,vals are set appropriately. 629 */ 630 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 631 { 632 PetscErrorCode ierr; 633 634 PetscFunctionBegin; 635 ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638 639 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 640 { 641 PetscErrorCode ierr; 642 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 643 PetscInt bs2; 644 MPI_Status recv_status; 645 PetscBool match_found = PETSC_FALSE; 646 647 PetscFunctionBegin; 648 *flg = 0; /* When a message is discovered this is reset to 1 */ 649 /* Return if no more messages to process */ 650 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 651 652 bs2 = stash->bs*stash->bs; 653 /* If a matching pair of receives are found, process them, and return the data to 654 the calling function. Until then keep receiving messages */ 655 while (!match_found) { 656 if (stash->reproduce) { 657 i = stash->reproduce_count++; 658 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 659 } else { 660 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 661 } 662 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 663 664 /* Now pack the received message into a structure which is usable by others */ 665 if (i % 2) { 666 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 667 668 flg_v[2*recv_status.MPI_SOURCE] = i/2; 669 670 *nvals = *nvals/bs2; 671 } else { 672 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 673 674 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 675 676 *nvals = *nvals/2; /* This message has both row indices and col indices */ 677 } 678 679 /* Check if we have both messages from this proc */ 680 i1 = flg_v[2*recv_status.MPI_SOURCE]; 681 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 682 if (i1 != -1 && i2 != -1) { 683 *rows = stash->rindices[i2]; 684 *cols = *rows + *nvals; 685 *vals = stash->rvalues[i1]; 686 *flg = 1; 687 stash->nprocessed++; 688 match_found = PETSC_TRUE; 689 } 690 } 691 PetscFunctionReturn(0); 692 } 693 694 typedef struct { 695 PetscInt row; 696 PetscInt col; 697 PetscScalar vals[1]; /* Actually an array of length bs2 */ 698 } MatStashBlock; 699 700 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 701 { 702 PetscErrorCode ierr; 703 PetscMatStashSpace space; 704 PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 705 PetscScalar **valptr; 706 707 PetscFunctionBegin; 708 ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 709 for (space=stash->space_head,cnt=0; space; space=space->next) { 710 for (i=0; i<space->local_used; i++) { 711 row[cnt] = space->idx[i]; 712 col[cnt] = space->idy[i]; 713 valptr[cnt] = &space->val[i*bs2]; 714 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 715 cnt++; 716 } 717 } 718 if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 719 ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 720 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 721 for (rowstart=0,cnt=0,i=1; i<=n; i++) { 722 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 723 PetscInt colstart; 724 ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 725 for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 726 PetscInt j,l; 727 MatStashBlock *block; 728 ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 729 block->row = row[rowstart]; 730 block->col = col[colstart]; 731 ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 732 for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 733 if (insertmode == ADD_VALUES) { 734 for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 735 } else { 736 ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 737 } 738 } 739 colstart = j; 740 } 741 rowstart = i; 742 } 743 } 744 ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 749 { 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 if (stash->blocktype == MPI_DATATYPE_NULL) { 754 PetscInt bs2 = PetscSqr(stash->bs); 755 PetscMPIInt blocklens[2]; 756 MPI_Aint displs[2]; 757 MPI_Datatype types[2],stype; 758 /* C++ std::complex is not my favorite datatype. Since it is not POD, we cannot use offsetof to find the offset of 759 * vals. But the layout is actually guaranteed by the standard, so we do a little dance here with struct 760 * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset. 761 */ 762 struct DummyBlock {PetscInt row,col; PetscReal vals;}; 763 764 stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 765 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 766 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 767 } 768 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 769 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 770 ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 771 blocklens[0] = 2; 772 blocklens[1] = bs2; 773 displs[0] = offsetof(struct DummyBlock,row); 774 displs[1] = offsetof(struct DummyBlock,vals); 775 types[0] = MPIU_INT; 776 types[1] = MPIU_SCALAR; 777 ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 778 ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 779 ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 780 ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 781 ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 /* Callback invoked after target rank has initiatied receive of rendezvous message. 787 * Here we post the main sends. 788 */ 789 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 790 { 791 MatStash *stash = (MatStash*)ctx; 792 MatStashHeader *hdr = (MatStashHeader*)sdata; 793 PetscErrorCode ierr; 794 795 PetscFunctionBegin; 796 if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 797 ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 798 stash->sendframes[rankid].count = hdr->count; 799 stash->sendframes[rankid].pending = 1; 800 PetscFunctionReturn(0); 801 } 802 803 /* Callback invoked by target after receiving rendezvous message. 804 * Here we post the main recvs. 805 */ 806 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 807 { 808 MatStash *stash = (MatStash*)ctx; 809 MatStashHeader *hdr = (MatStashHeader*)rdata; 810 MatStashFrame *frame; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 815 ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 816 ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 817 frame->count = hdr->count; 818 frame->pending = 1; 819 PetscFunctionReturn(0); 820 } 821 822 /* 823 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 824 */ 825 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 826 { 827 PetscErrorCode ierr; 828 size_t nblocks; 829 char *sendblocks; 830 831 PetscFunctionBegin; 832 #if defined(PETSC_USE_DEBUG) 833 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 834 InsertMode addv; 835 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 } 838 #endif 839 840 if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */ 841 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 842 } 843 844 ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 845 ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 846 ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 847 ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 848 if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 849 PetscInt i; 850 size_t b; 851 for (i=0,b=0; i<stash->nsendranks; i++) { 852 stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 853 /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 854 stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 855 for ( ; b<nblocks; b++) { 856 MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 857 if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 858 if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 859 stash->sendhdr[i].count++; 860 } 861 } 862 } else { /* Dynamically count and pack (first time) */ 863 PetscInt sendno; 864 size_t i,rowstart; 865 866 /* Count number of send ranks and allocate for sends */ 867 stash->nsendranks = 0; 868 for (rowstart=0; rowstart<nblocks; ) { 869 PetscInt owner; 870 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 871 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 872 if (owner < 0) owner = -(owner+2); 873 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 874 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 875 if (sendblock_i->row >= owners[owner+1]) break; 876 } 877 stash->nsendranks++; 878 rowstart = i; 879 } 880 ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 881 882 /* Set up sendhdrs and sendframes */ 883 sendno = 0; 884 for (rowstart=0; rowstart<nblocks; ) { 885 PetscInt owner; 886 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 887 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 888 if (owner < 0) owner = -(owner+2); 889 stash->sendranks[sendno] = owner; 890 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 891 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 892 if (sendblock_i->row >= owners[owner+1]) break; 893 } 894 stash->sendframes[sendno].buffer = sendblock_rowstart; 895 stash->sendframes[sendno].pending = 0; 896 stash->sendhdr[sendno].count = i - rowstart; 897 sendno++; 898 rowstart = i; 899 } 900 if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 901 } 902 903 /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 904 * message or a dummy entry of some sort. */ 905 if (mat->insertmode == INSERT_VALUES) { 906 size_t i; 907 for (i=0; i<nblocks; i++) { 908 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 909 sendblock_i->row = -(sendblock_i->row+1); 910 } 911 } 912 913 if (stash->subset_off_proc && mat->subsetoffprocentries) { 914 PetscMPIInt i,tag; 915 ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 916 for (i=0; i<stash->nrecvranks; i++) { 917 ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 918 } 919 for (i=0; i<stash->nsendranks; i++) { 920 ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 921 } 922 stash->use_status = PETSC_TRUE; /* Use count from message status. */ 923 } else { 924 ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 925 &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 926 MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 927 ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 928 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 929 } 930 931 ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 932 stash->recvframe_active = NULL; 933 stash->recvframe_i = 0; 934 stash->some_i = 0; 935 stash->some_count = 0; 936 stash->recvcount = 0; 937 stash->subset_off_proc = mat->subsetoffprocentries; 938 stash->insertmode = &mat->insertmode; 939 PetscFunctionReturn(0); 940 } 941 942 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 943 { 944 PetscErrorCode ierr; 945 MatStashBlock *block; 946 947 PetscFunctionBegin; 948 *flg = 0; 949 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 950 if (stash->some_i == stash->some_count) { 951 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 952 ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 953 stash->some_i = 0; 954 } 955 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 956 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 957 if (stash->use_status) { /* Count what was actually sent */ 958 ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 959 } 960 if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 961 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 962 if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 963 if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 964 if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 965 } 966 stash->some_i++; 967 stash->recvcount++; 968 stash->recvframe_i = 0; 969 } 970 *n = 1; 971 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 972 if (block->row < 0) block->row = -(block->row + 1); 973 *row = &block->row; 974 *col = &block->col; 975 *val = block->vals; 976 stash->recvframe_i++; 977 *flg = 1; 978 PetscFunctionReturn(0); 979 } 980 981 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 982 { 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 987 if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 988 void *dummy; 989 ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 990 } else { /* No reuse, so collect everything. */ 991 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 992 } 993 994 /* Now update nmaxold to be app 10% more than max n used, this way the 995 wastage of space is reduced the next time this stash is used. 996 Also update the oldmax, only if it increases */ 997 if (stash->n) { 998 PetscInt bs2 = stash->bs*stash->bs; 999 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1000 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1001 } 1002 1003 stash->nmax = 0; 1004 stash->n = 0; 1005 stash->reallocs = -1; 1006 stash->nprocessed = 0; 1007 1008 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1009 1010 stash->space = 0; 1011 1012 PetscFunctionReturn(0); 1013 } 1014 1015 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1016 { 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1021 ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1022 stash->recvframes = NULL; 1023 ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1024 if (stash->blocktype != MPI_DATATYPE_NULL) { 1025 ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1026 } 1027 stash->nsendranks = 0; 1028 stash->nrecvranks = 0; 1029 ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1030 ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1031 ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1032 ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1033 ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1034 ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037