1 2 #include <petsc/private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9 #if !defined(PETSC_HAVE_MPIUNI) 10 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 11 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 12 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 13 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 14 #endif 15 16 /* 17 MatStashCreate_Private - Creates a stash,currently used for all the parallel 18 matrix implementations. The stash is where elements of a matrix destined 19 to be stored on other processors are kept until matrix assembly is done. 20 21 This is a simple minded stash. Simply adds entries to end of stash. 22 23 Input Parameters: 24 comm - communicator, required for scatters. 25 bs - stash block size. used when stashing blocks of values 26 27 Output Parameters: 28 stash - the newly created stash 29 */ 30 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 31 { 32 PetscErrorCode ierr; 33 PetscInt max,*opt,nopt,i; 34 PetscBool flg; 35 36 PetscFunctionBegin; 37 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 38 stash->comm = comm; 39 40 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 41 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 42 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 43 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 44 ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 45 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 46 47 48 nopt = stash->size; 49 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 50 ierr = PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 51 if (flg) { 52 if (nopt == 1) max = opt[0]; 53 else if (nopt == stash->size) max = opt[stash->rank]; 54 else if (stash->rank < nopt) max = opt[stash->rank]; 55 else max = 0; /* Use default */ 56 stash->umax = max; 57 } else { 58 stash->umax = 0; 59 } 60 ierr = PetscFree(opt);CHKERRQ(ierr); 61 if (bs <= 0) bs = 1; 62 63 stash->bs = bs; 64 stash->nmax = 0; 65 stash->oldnmax = 0; 66 stash->n = 0; 67 stash->reallocs = -1; 68 stash->space_head = 0; 69 stash->space = 0; 70 71 stash->send_waits = 0; 72 stash->recv_waits = 0; 73 stash->send_status = 0; 74 stash->nsends = 0; 75 stash->nrecvs = 0; 76 stash->svalues = 0; 77 stash->rvalues = 0; 78 stash->rindices = 0; 79 stash->nprocessed = 0; 80 stash->reproduce = PETSC_FALSE; 81 stash->blocktype = MPI_DATATYPE_NULL; 82 83 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 84 #if !defined(PETSC_HAVE_MPIUNI) 85 ierr = PetscOptionsGetBool(NULL,NULL,"-matstash_legacy",&flg,NULL);CHKERRQ(ierr); 86 if (!flg) { 87 stash->ScatterBegin = MatStashScatterBegin_BTS; 88 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 89 stash->ScatterEnd = MatStashScatterEnd_BTS; 90 stash->ScatterDestroy = MatStashScatterDestroy_BTS; 91 } else { 92 #endif 93 stash->ScatterBegin = MatStashScatterBegin_Ref; 94 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 95 stash->ScatterEnd = MatStashScatterEnd_Ref; 96 stash->ScatterDestroy = NULL; 97 #if !defined(PETSC_HAVE_MPIUNI) 98 } 99 #endif 100 PetscFunctionReturn(0); 101 } 102 103 /* 104 MatStashDestroy_Private - Destroy the stash 105 */ 106 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 107 { 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 112 if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 113 114 stash->space = 0; 115 116 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 /* 121 MatStashScatterEnd_Private - This is called as the final stage of 122 scatter. The final stages of message passing is done here, and 123 all the memory used for message passing is cleaned up. This 124 routine also resets the stash, and deallocates the memory used 125 for the stash. It also keeps track of the current memory usage 126 so that the same value can be used the next time through. 127 */ 128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 129 { 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 138 { 139 PetscErrorCode ierr; 140 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 141 MPI_Status *send_status; 142 143 PetscFunctionBegin; 144 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 145 /* wait on sends */ 146 if (nsends) { 147 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 148 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 149 ierr = PetscFree(send_status);CHKERRQ(ierr); 150 } 151 152 /* Now update nmaxold to be app 10% more than max n used, this way the 153 wastage of space is reduced the next time this stash is used. 154 Also update the oldmax, only if it increases */ 155 if (stash->n) { 156 bs2 = stash->bs*stash->bs; 157 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 158 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 159 } 160 161 stash->nmax = 0; 162 stash->n = 0; 163 stash->reallocs = -1; 164 stash->nprocessed = 0; 165 166 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 167 168 stash->space = 0; 169 170 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 171 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 172 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 173 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 174 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 175 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 176 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 177 PetscFunctionReturn(0); 178 } 179 180 /* 181 MatStashGetInfo_Private - Gets the relavant statistics of the stash 182 183 Input Parameters: 184 stash - the stash 185 nstash - the size of the stash. Indicates the number of values stored. 186 reallocs - the number of additional mallocs incurred. 187 188 */ 189 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 190 { 191 PetscInt bs2 = stash->bs*stash->bs; 192 193 PetscFunctionBegin; 194 if (nstash) *nstash = stash->n*bs2; 195 if (reallocs) { 196 if (stash->reallocs < 0) *reallocs = 0; 197 else *reallocs = stash->reallocs; 198 } 199 PetscFunctionReturn(0); 200 } 201 202 /* 203 MatStashSetInitialSize_Private - Sets the initial size of the stash 204 205 Input Parameters: 206 stash - the stash 207 max - the value that is used as the max size of the stash. 208 this value is used while allocating memory. 209 */ 210 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 211 { 212 PetscFunctionBegin; 213 stash->umax = max; 214 PetscFunctionReturn(0); 215 } 216 217 /* MatStashExpand_Private - Expand the stash. This function is called 218 when the space in the stash is not sufficient to add the new values 219 being inserted into the stash. 220 221 Input Parameters: 222 stash - the stash 223 incr - the minimum increase requested 224 225 Notes: 226 This routine doubles the currently used memory. 227 */ 228 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 229 { 230 PetscErrorCode ierr; 231 PetscInt newnmax,bs2= stash->bs*stash->bs; 232 233 PetscFunctionBegin; 234 /* allocate a larger stash */ 235 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 236 if (stash->umax) newnmax = stash->umax/bs2; 237 else newnmax = DEFAULT_STASH_SIZE/bs2; 238 } else if (!stash->nmax) { /* resuing stash */ 239 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 240 else newnmax = stash->oldnmax/bs2; 241 } else newnmax = stash->nmax*2; 242 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 243 244 /* Get a MatStashSpace and attach it to stash */ 245 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 246 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 247 stash->space_head = stash->space; 248 } 249 250 stash->reallocs++; 251 stash->nmax = newnmax; 252 PetscFunctionReturn(0); 253 } 254 /* 255 MatStashValuesRow_Private - inserts values into the stash. This function 256 expects the values to be roworiented. Multiple columns belong to the same row 257 can be inserted with a single call to this function. 258 259 Input Parameters: 260 stash - the stash 261 row - the global row correspoiding to the values 262 n - the number of elements inserted. All elements belong to the above row. 263 idxn - the global column indices corresponding to each of the values. 264 values - the values inserted 265 */ 266 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 267 { 268 PetscErrorCode ierr; 269 PetscInt i,k,cnt = 0; 270 PetscMatStashSpace space=stash->space; 271 272 PetscFunctionBegin; 273 /* Check and see if we have sufficient memory */ 274 if (!space || space->local_remaining < n) { 275 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 276 } 277 space = stash->space; 278 k = space->local_used; 279 for (i=0; i<n; i++) { 280 if (ignorezeroentries && (values[i] == 0.0)) continue; 281 space->idx[k] = row; 282 space->idy[k] = idxn[i]; 283 space->val[k] = values[i]; 284 k++; 285 cnt++; 286 } 287 stash->n += cnt; 288 space->local_used += cnt; 289 space->local_remaining -= cnt; 290 PetscFunctionReturn(0); 291 } 292 293 /* 294 MatStashValuesCol_Private - inserts values into the stash. This function 295 expects the values to be columnoriented. Multiple columns belong to the same row 296 can be inserted with a single call to this function. 297 298 Input Parameters: 299 stash - the stash 300 row - the global row correspoiding to the values 301 n - the number of elements inserted. All elements belong to the above row. 302 idxn - the global column indices corresponding to each of the values. 303 values - the values inserted 304 stepval - the consecutive values are sepated by a distance of stepval. 305 this happens because the input is columnoriented. 306 */ 307 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 308 { 309 PetscErrorCode ierr; 310 PetscInt i,k,cnt = 0; 311 PetscMatStashSpace space=stash->space; 312 313 PetscFunctionBegin; 314 /* Check and see if we have sufficient memory */ 315 if (!space || space->local_remaining < n) { 316 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 317 } 318 space = stash->space; 319 k = space->local_used; 320 for (i=0; i<n; i++) { 321 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 322 space->idx[k] = row; 323 space->idy[k] = idxn[i]; 324 space->val[k] = values[i*stepval]; 325 k++; 326 cnt++; 327 } 328 stash->n += cnt; 329 space->local_used += cnt; 330 space->local_remaining -= cnt; 331 PetscFunctionReturn(0); 332 } 333 334 /* 335 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 336 This function expects the values to be roworiented. Multiple columns belong 337 to the same block-row can be inserted with a single call to this function. 338 This function extracts the sub-block of values based on the dimensions of 339 the original input block, and the row,col values corresponding to the blocks. 340 341 Input Parameters: 342 stash - the stash 343 row - the global block-row correspoiding to the values 344 n - the number of elements inserted. All elements belong to the above row. 345 idxn - the global block-column indices corresponding to each of the blocks of 346 values. Each block is of size bs*bs. 347 values - the values inserted 348 rmax - the number of block-rows in the original block. 349 cmax - the number of block-columsn on the original block. 350 idx - the index of the current block-row in the original block. 351 */ 352 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 353 { 354 PetscErrorCode ierr; 355 PetscInt i,j,k,bs2,bs=stash->bs,l; 356 const PetscScalar *vals; 357 PetscScalar *array; 358 PetscMatStashSpace space=stash->space; 359 360 PetscFunctionBegin; 361 if (!space || space->local_remaining < n) { 362 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 363 } 364 space = stash->space; 365 l = space->local_used; 366 bs2 = bs*bs; 367 for (i=0; i<n; i++) { 368 space->idx[l] = row; 369 space->idy[l] = idxn[i]; 370 /* Now copy over the block of values. Store the values column oriented. 371 This enables inserting multiple blocks belonging to a row with a single 372 funtion call */ 373 array = space->val + bs2*l; 374 vals = values + idx*bs2*n + bs*i; 375 for (j=0; j<bs; j++) { 376 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 377 array++; 378 vals += cmax*bs; 379 } 380 l++; 381 } 382 stash->n += n; 383 space->local_used += n; 384 space->local_remaining -= n; 385 PetscFunctionReturn(0); 386 } 387 388 /* 389 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 390 This function expects the values to be roworiented. Multiple columns belong 391 to the same block-row can be inserted with a single call to this function. 392 This function extracts the sub-block of values based on the dimensions of 393 the original input block, and the row,col values corresponding to the blocks. 394 395 Input Parameters: 396 stash - the stash 397 row - the global block-row correspoiding to the values 398 n - the number of elements inserted. All elements belong to the above row. 399 idxn - the global block-column indices corresponding to each of the blocks of 400 values. Each block is of size bs*bs. 401 values - the values inserted 402 rmax - the number of block-rows in the original block. 403 cmax - the number of block-columsn on the original block. 404 idx - the index of the current block-row in the original block. 405 */ 406 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 407 { 408 PetscErrorCode ierr; 409 PetscInt i,j,k,bs2,bs=stash->bs,l; 410 const PetscScalar *vals; 411 PetscScalar *array; 412 PetscMatStashSpace space=stash->space; 413 414 PetscFunctionBegin; 415 if (!space || space->local_remaining < n) { 416 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 417 } 418 space = stash->space; 419 l = space->local_used; 420 bs2 = bs*bs; 421 for (i=0; i<n; i++) { 422 space->idx[l] = row; 423 space->idy[l] = idxn[i]; 424 /* Now copy over the block of values. Store the values column oriented. 425 This enables inserting multiple blocks belonging to a row with a single 426 funtion call */ 427 array = space->val + bs2*l; 428 vals = values + idx*bs2*n + bs*i; 429 for (j=0; j<bs; j++) { 430 for (k=0; k<bs; k++) array[k] = vals[k]; 431 array += bs; 432 vals += rmax*bs; 433 } 434 l++; 435 } 436 stash->n += n; 437 space->local_used += n; 438 space->local_remaining -= n; 439 PetscFunctionReturn(0); 440 } 441 /* 442 MatStashScatterBegin_Private - Initiates the transfer of values to the 443 correct owners. This function goes through the stash, and check the 444 owners of each stashed value, and sends the values off to the owner 445 processors. 446 447 Input Parameters: 448 stash - the stash 449 owners - an array of size 'no-of-procs' which gives the ownership range 450 for each node. 451 452 Notes: The 'owners' array in the cased of the blocked-stash has the 453 ranges specified blocked global indices, and for the regular stash in 454 the proper global indices. 455 */ 456 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 457 { 458 PetscErrorCode ierr; 459 460 PetscFunctionBegin; 461 ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 466 { 467 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 468 PetscInt size=stash->size,nsends; 469 PetscErrorCode ierr; 470 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 471 PetscScalar **rvalues,*svalues; 472 MPI_Comm comm = stash->comm; 473 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 474 PetscMPIInt *sizes,*nlengths,nreceives; 475 PetscInt *sp_idx,*sp_idy; 476 PetscScalar *sp_val; 477 PetscMatStashSpace space,space_next; 478 479 PetscFunctionBegin; 480 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 481 InsertMode addv; 482 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 483 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 484 mat->insertmode = addv; /* in case this processor had no cache */ 485 } 486 487 bs2 = stash->bs*stash->bs; 488 489 /* first count number of contributors to each processor */ 490 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 491 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 492 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 493 494 i = j = 0; 495 lastidx = -1; 496 space = stash->space_head; 497 while (space) { 498 space_next = space->next; 499 sp_idx = space->idx; 500 for (l=0; l<space->local_used; l++) { 501 /* if indices are NOT locally sorted, need to start search at the beginning */ 502 if (lastidx > (idx = sp_idx[l])) j = 0; 503 lastidx = idx; 504 for (; j<size; j++) { 505 if (idx >= owners[j] && idx < owners[j+1]) { 506 nlengths[j]++; owner[i] = j; break; 507 } 508 } 509 i++; 510 } 511 space = space_next; 512 } 513 /* Now check what procs get messages - and compute nsends. */ 514 for (i=0, nsends=0; i<size; i++) { 515 if (nlengths[i]) { 516 sizes[i] = 1; nsends++; 517 } 518 } 519 520 {PetscMPIInt *onodes,*olengths; 521 /* Determine the number of messages to expect, their lengths, from from-ids */ 522 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 523 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 524 /* since clubbing row,col - lengths are multiplied by 2 */ 525 for (i=0; i<nreceives; i++) olengths[i] *=2; 526 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 527 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 528 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 529 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 530 ierr = PetscFree(onodes);CHKERRQ(ierr); 531 ierr = PetscFree(olengths);CHKERRQ(ierr);} 532 533 /* do sends: 534 1) starts[i] gives the starting index in svalues for stuff going to 535 the ith processor 536 */ 537 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 538 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 539 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 540 /* use 2 sends the first with all_a, the next with all_i and all_j */ 541 startv[0] = 0; starti[0] = 0; 542 for (i=1; i<size; i++) { 543 startv[i] = startv[i-1] + nlengths[i-1]; 544 starti[i] = starti[i-1] + 2*nlengths[i-1]; 545 } 546 547 i = 0; 548 space = stash->space_head; 549 while (space) { 550 space_next = space->next; 551 sp_idx = space->idx; 552 sp_idy = space->idy; 553 sp_val = space->val; 554 for (l=0; l<space->local_used; l++) { 555 j = owner[i]; 556 if (bs2 == 1) { 557 svalues[startv[j]] = sp_val[l]; 558 } else { 559 PetscInt k; 560 PetscScalar *buf1,*buf2; 561 buf1 = svalues+bs2*startv[j]; 562 buf2 = space->val + bs2*l; 563 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 564 } 565 sindices[starti[j]] = sp_idx[l]; 566 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 567 startv[j]++; 568 starti[j]++; 569 i++; 570 } 571 space = space_next; 572 } 573 startv[0] = 0; 574 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 575 576 for (i=0,count=0; i<size; i++) { 577 if (sizes[i]) { 578 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 579 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 580 } 581 } 582 #if defined(PETSC_USE_INFO) 583 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 584 for (i=0; i<size; i++) { 585 if (sizes[i]) { 586 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 587 } 588 } 589 #endif 590 ierr = PetscFree(nlengths);CHKERRQ(ierr); 591 ierr = PetscFree(owner);CHKERRQ(ierr); 592 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 593 ierr = PetscFree(sizes);CHKERRQ(ierr); 594 595 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 596 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 597 598 for (i=0; i<nreceives; i++) { 599 recv_waits[2*i] = recv_waits1[i]; 600 recv_waits[2*i+1] = recv_waits2[i]; 601 } 602 stash->recv_waits = recv_waits; 603 604 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 605 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 606 607 stash->svalues = svalues; 608 stash->sindices = sindices; 609 stash->rvalues = rvalues; 610 stash->rindices = rindices; 611 stash->send_waits = send_waits; 612 stash->nsends = nsends; 613 stash->nrecvs = nreceives; 614 stash->reproduce_count = 0; 615 PetscFunctionReturn(0); 616 } 617 618 /* 619 MatStashScatterGetMesg_Private - This function waits on the receives posted 620 in the function MatStashScatterBegin_Private() and returns one message at 621 a time to the calling function. If no messages are left, it indicates this 622 by setting flg = 0, else it sets flg = 1. 623 624 Input Parameters: 625 stash - the stash 626 627 Output Parameters: 628 nvals - the number of entries in the current message. 629 rows - an array of row indices (or blocked indices) corresponding to the values 630 cols - an array of columnindices (or blocked indices) corresponding to the values 631 vals - the values 632 flg - 0 indicates no more message left, and the current call has no values associated. 633 1 indicates that the current call successfully received a message, and the 634 other output parameters nvals,rows,cols,vals are set appropriately. 635 */ 636 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 637 { 638 PetscErrorCode ierr; 639 640 PetscFunctionBegin; 641 ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 642 PetscFunctionReturn(0); 643 } 644 645 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 646 { 647 PetscErrorCode ierr; 648 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 649 PetscInt bs2; 650 MPI_Status recv_status; 651 PetscBool match_found = PETSC_FALSE; 652 653 PetscFunctionBegin; 654 *flg = 0; /* When a message is discovered this is reset to 1 */ 655 /* Return if no more messages to process */ 656 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 657 658 bs2 = stash->bs*stash->bs; 659 /* If a matching pair of receives are found, process them, and return the data to 660 the calling function. Until then keep receiving messages */ 661 while (!match_found) { 662 if (stash->reproduce) { 663 i = stash->reproduce_count++; 664 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 665 } else { 666 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 667 } 668 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 669 670 /* Now pack the received message into a structure which is usable by others */ 671 if (i % 2) { 672 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 673 674 flg_v[2*recv_status.MPI_SOURCE] = i/2; 675 676 *nvals = *nvals/bs2; 677 } else { 678 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 679 680 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 681 682 *nvals = *nvals/2; /* This message has both row indices and col indices */ 683 } 684 685 /* Check if we have both messages from this proc */ 686 i1 = flg_v[2*recv_status.MPI_SOURCE]; 687 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 688 if (i1 != -1 && i2 != -1) { 689 *rows = stash->rindices[i2]; 690 *cols = *rows + *nvals; 691 *vals = stash->rvalues[i1]; 692 *flg = 1; 693 stash->nprocessed++; 694 match_found = PETSC_TRUE; 695 } 696 } 697 PetscFunctionReturn(0); 698 } 699 700 #if !defined(PETSC_HAVE_MPIUNI) 701 typedef struct { 702 PetscInt row; 703 PetscInt col; 704 PetscScalar vals[1]; /* Actually an array of length bs2 */ 705 } MatStashBlock; 706 707 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 708 { 709 PetscErrorCode ierr; 710 PetscMatStashSpace space; 711 PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 712 PetscScalar **valptr; 713 714 PetscFunctionBegin; 715 ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 716 for (space=stash->space_head,cnt=0; space; space=space->next) { 717 for (i=0; i<space->local_used; i++) { 718 row[cnt] = space->idx[i]; 719 col[cnt] = space->idy[i]; 720 valptr[cnt] = &space->val[i*bs2]; 721 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 722 cnt++; 723 } 724 } 725 if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 726 ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 727 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 728 for (rowstart=0,cnt=0,i=1; i<=n; i++) { 729 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 730 PetscInt colstart; 731 ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 732 for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 733 PetscInt j,l; 734 MatStashBlock *block; 735 ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 736 block->row = row[rowstart]; 737 block->col = col[colstart]; 738 ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 739 for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 740 if (insertmode == ADD_VALUES) { 741 for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 742 } else { 743 ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 744 } 745 } 746 colstart = j; 747 } 748 rowstart = i; 749 } 750 } 751 ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } 754 755 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 756 { 757 PetscErrorCode ierr; 758 759 PetscFunctionBegin; 760 if (stash->blocktype == MPI_DATATYPE_NULL) { 761 PetscInt bs2 = PetscSqr(stash->bs); 762 PetscMPIInt blocklens[2]; 763 MPI_Aint displs[2]; 764 MPI_Datatype types[2],stype; 765 /* C++ std::complex is not my favorite datatype. Since it is not POD, we cannot use offsetof to find the offset of 766 * vals. But the layout is actually guaranteed by the standard, so we do a little dance here with struct 767 * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset. 768 */ 769 struct DummyBlock {PetscInt row,col; PetscReal vals;}; 770 771 stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 772 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 773 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 774 } 775 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 776 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 777 ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 778 blocklens[0] = 2; 779 blocklens[1] = bs2; 780 displs[0] = offsetof(struct DummyBlock,row); 781 displs[1] = offsetof(struct DummyBlock,vals); 782 types[0] = MPIU_INT; 783 types[1] = MPIU_SCALAR; 784 ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 785 ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 786 ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 787 ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 788 ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 789 } 790 PetscFunctionReturn(0); 791 } 792 793 /* Callback invoked after target rank has initiatied receive of rendezvous message. 794 * Here we post the main sends. 795 */ 796 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 797 { 798 MatStash *stash = (MatStash*)ctx; 799 MatStashHeader *hdr = (MatStashHeader*)sdata; 800 PetscErrorCode ierr; 801 802 PetscFunctionBegin; 803 if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 804 ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 805 stash->sendframes[rankid].count = hdr->count; 806 stash->sendframes[rankid].pending = 1; 807 PetscFunctionReturn(0); 808 } 809 810 /* Callback invoked by target after receiving rendezvous message. 811 * Here we post the main recvs. 812 */ 813 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 814 { 815 MatStash *stash = (MatStash*)ctx; 816 MatStashHeader *hdr = (MatStashHeader*)rdata; 817 MatStashFrame *frame; 818 PetscErrorCode ierr; 819 820 PetscFunctionBegin; 821 ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 822 ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 823 ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 824 frame->count = hdr->count; 825 frame->pending = 1; 826 PetscFunctionReturn(0); 827 } 828 829 /* 830 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 831 */ 832 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 833 { 834 PetscErrorCode ierr; 835 size_t nblocks; 836 char *sendblocks; 837 838 PetscFunctionBegin; 839 #if defined(PETSC_USE_DEBUG) 840 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 841 InsertMode addv; 842 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 843 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 844 } 845 #endif 846 847 if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */ 848 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 849 } 850 851 ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 852 ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 853 ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 854 ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 855 if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 856 PetscInt i; 857 size_t b; 858 for (i=0,b=0; i<stash->nsendranks; i++) { 859 stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 860 /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 861 stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 862 for ( ; b<nblocks; b++) { 863 MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 864 if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 865 if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 866 stash->sendhdr[i].count++; 867 } 868 } 869 } else { /* Dynamically count and pack (first time) */ 870 PetscInt sendno; 871 size_t i,rowstart; 872 873 /* Count number of send ranks and allocate for sends */ 874 stash->nsendranks = 0; 875 for (rowstart=0; rowstart<nblocks; ) { 876 PetscInt owner; 877 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 878 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 879 if (owner < 0) owner = -(owner+2); 880 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 881 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 882 if (sendblock_i->row >= owners[owner+1]) break; 883 } 884 stash->nsendranks++; 885 rowstart = i; 886 } 887 ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 888 889 /* Set up sendhdrs and sendframes */ 890 sendno = 0; 891 for (rowstart=0; rowstart<nblocks; ) { 892 PetscInt owner; 893 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 894 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 895 if (owner < 0) owner = -(owner+2); 896 stash->sendranks[sendno] = owner; 897 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 898 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 899 if (sendblock_i->row >= owners[owner+1]) break; 900 } 901 stash->sendframes[sendno].buffer = sendblock_rowstart; 902 stash->sendframes[sendno].pending = 0; 903 stash->sendhdr[sendno].count = i - rowstart; 904 sendno++; 905 rowstart = i; 906 } 907 if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 908 } 909 910 /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 911 * message or a dummy entry of some sort. */ 912 if (mat->insertmode == INSERT_VALUES) { 913 size_t i; 914 for (i=0; i<nblocks; i++) { 915 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 916 sendblock_i->row = -(sendblock_i->row+1); 917 } 918 } 919 920 if (stash->subset_off_proc && mat->subsetoffprocentries) { 921 PetscMPIInt i,tag; 922 ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 923 for (i=0; i<stash->nrecvranks; i++) { 924 ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 925 } 926 for (i=0; i<stash->nsendranks; i++) { 927 ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 928 } 929 stash->use_status = PETSC_TRUE; /* Use count from message status. */ 930 } else { 931 ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 932 &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 933 MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 934 ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 935 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 936 } 937 938 ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 939 stash->recvframe_active = NULL; 940 stash->recvframe_i = 0; 941 stash->some_i = 0; 942 stash->some_count = 0; 943 stash->recvcount = 0; 944 stash->subset_off_proc = mat->subsetoffprocentries; 945 stash->insertmode = &mat->insertmode; 946 PetscFunctionReturn(0); 947 } 948 949 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 950 { 951 PetscErrorCode ierr; 952 MatStashBlock *block; 953 954 PetscFunctionBegin; 955 *flg = 0; 956 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 957 if (stash->some_i == stash->some_count) { 958 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 959 ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 960 stash->some_i = 0; 961 } 962 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 963 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 964 if (stash->use_status) { /* Count what was actually sent */ 965 ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 966 } 967 if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 968 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 969 if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 970 if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 971 if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 972 } 973 stash->some_i++; 974 stash->recvcount++; 975 stash->recvframe_i = 0; 976 } 977 *n = 1; 978 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 979 if (block->row < 0) block->row = -(block->row + 1); 980 *row = &block->row; 981 *col = &block->col; 982 *val = block->vals; 983 stash->recvframe_i++; 984 *flg = 1; 985 PetscFunctionReturn(0); 986 } 987 988 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 989 { 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 994 if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 995 void *dummy; 996 ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 997 } else { /* No reuse, so collect everything. */ 998 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 999 } 1000 1001 /* Now update nmaxold to be app 10% more than max n used, this way the 1002 wastage of space is reduced the next time this stash is used. 1003 Also update the oldmax, only if it increases */ 1004 if (stash->n) { 1005 PetscInt bs2 = stash->bs*stash->bs; 1006 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1007 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1008 } 1009 1010 stash->nmax = 0; 1011 stash->n = 0; 1012 stash->reallocs = -1; 1013 stash->nprocessed = 0; 1014 1015 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1016 1017 stash->space = 0; 1018 1019 PetscFunctionReturn(0); 1020 } 1021 1022 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1023 { 1024 PetscErrorCode ierr; 1025 1026 PetscFunctionBegin; 1027 ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1028 ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1029 stash->recvframes = NULL; 1030 ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1031 if (stash->blocktype != MPI_DATATYPE_NULL) { 1032 ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1033 } 1034 stash->nsendranks = 0; 1035 stash->nrecvranks = 0; 1036 ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1037 ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1038 ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1039 ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1040 ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1041 ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 #endif 1045