1 2 #include <petsc/private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*); 7 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 8 static PetscErrorCode MatStashScatterEnd_Ref(MatStash*); 9 static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*); 10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*); 11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash*); 12 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*); 13 14 /* 15 MatStashCreate_Private - Creates a stash,currently used for all the parallel 16 matrix implementations. The stash is where elements of a matrix destined 17 to be stored on other processors are kept until matrix assembly is done. 18 19 This is a simple minded stash. Simply adds entries to end of stash. 20 21 Input Parameters: 22 comm - communicator, required for scatters. 23 bs - stash block size. used when stashing blocks of values 24 25 Output Parameters: 26 stash - the newly created stash 27 */ 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatStashCreate_Private" 30 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 31 { 32 PetscErrorCode ierr; 33 PetscInt max,*opt,nopt,i; 34 PetscBool flg; 35 36 PetscFunctionBegin; 37 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 38 stash->comm = comm; 39 40 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 41 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 42 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 43 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 44 ierr = PetscMalloc1(2*stash->size,&stash->flg_v);CHKERRQ(ierr); 45 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 46 47 48 nopt = stash->size; 49 ierr = PetscMalloc1(nopt,&opt);CHKERRQ(ierr); 50 ierr = PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 51 if (flg) { 52 if (nopt == 1) max = opt[0]; 53 else if (nopt == stash->size) max = opt[stash->rank]; 54 else if (stash->rank < nopt) max = opt[stash->rank]; 55 else max = 0; /* Use default */ 56 stash->umax = max; 57 } else { 58 stash->umax = 0; 59 } 60 ierr = PetscFree(opt);CHKERRQ(ierr); 61 if (bs <= 0) bs = 1; 62 63 stash->bs = bs; 64 stash->nmax = 0; 65 stash->oldnmax = 0; 66 stash->n = 0; 67 stash->reallocs = -1; 68 stash->space_head = 0; 69 stash->space = 0; 70 71 stash->send_waits = 0; 72 stash->recv_waits = 0; 73 stash->send_status = 0; 74 stash->nsends = 0; 75 stash->nrecvs = 0; 76 stash->svalues = 0; 77 stash->rvalues = 0; 78 stash->rindices = 0; 79 stash->nprocessed = 0; 80 stash->reproduce = PETSC_FALSE; 81 stash->blocktype = MPI_DATATYPE_NULL; 82 83 ierr = PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);CHKERRQ(ierr); 84 ierr = PetscOptionsGetBool(NULL,"-matstash_bts",&flg,NULL);CHKERRQ(ierr); 85 if (flg) { 86 stash->ScatterBegin = MatStashScatterBegin_BTS; 87 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 88 stash->ScatterEnd = MatStashScatterEnd_BTS; 89 stash->ScatterDestroy = MatStashScatterDestroy_BTS; 90 } else { 91 stash->ScatterBegin = MatStashScatterBegin_Ref; 92 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 93 stash->ScatterEnd = MatStashScatterEnd_Ref; 94 stash->ScatterDestroy = NULL; 95 } 96 PetscFunctionReturn(0); 97 } 98 99 /* 100 MatStashDestroy_Private - Destroy the stash 101 */ 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatStashDestroy_Private" 104 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 105 { 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 110 if (stash->ScatterDestroy) {ierr = (*stash->ScatterDestroy)(stash);CHKERRQ(ierr);} 111 112 stash->space = 0; 113 114 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 115 PetscFunctionReturn(0); 116 } 117 118 /* 119 MatStashScatterEnd_Private - This is called as the final stage of 120 scatter. The final stages of message passing is done here, and 121 all the memory used for message passing is cleaned up. This 122 routine also resets the stash, and deallocates the memory used 123 for the stash. It also keeps track of the current memory usage 124 so that the same value can be used the next time through. 125 */ 126 #undef __FUNCT__ 127 #define __FUNCT__ "MatStashScatterEnd_Private" 128 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 129 { 130 PetscErrorCode ierr; 131 132 PetscFunctionBegin; 133 ierr = (*stash->ScatterEnd)(stash);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatStashScatterEnd_Ref" 139 static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 140 { 141 PetscErrorCode ierr; 142 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 143 MPI_Status *send_status; 144 145 PetscFunctionBegin; 146 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 147 /* wait on sends */ 148 if (nsends) { 149 ierr = PetscMalloc1(2*nsends,&send_status);CHKERRQ(ierr); 150 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 151 ierr = PetscFree(send_status);CHKERRQ(ierr); 152 } 153 154 /* Now update nmaxold to be app 10% more than max n used, this way the 155 wastage of space is reduced the next time this stash is used. 156 Also update the oldmax, only if it increases */ 157 if (stash->n) { 158 bs2 = stash->bs*stash->bs; 159 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 160 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 161 } 162 163 stash->nmax = 0; 164 stash->n = 0; 165 stash->reallocs = -1; 166 stash->nprocessed = 0; 167 168 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 169 170 stash->space = 0; 171 172 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 173 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 174 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 175 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 176 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 177 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 178 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 /* 183 MatStashGetInfo_Private - Gets the relavant statistics of the stash 184 185 Input Parameters: 186 stash - the stash 187 nstash - the size of the stash. Indicates the number of values stored. 188 reallocs - the number of additional mallocs incurred. 189 190 */ 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatStashGetInfo_Private" 193 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 194 { 195 PetscInt bs2 = stash->bs*stash->bs; 196 197 PetscFunctionBegin; 198 if (nstash) *nstash = stash->n*bs2; 199 if (reallocs) { 200 if (stash->reallocs < 0) *reallocs = 0; 201 else *reallocs = stash->reallocs; 202 } 203 PetscFunctionReturn(0); 204 } 205 206 /* 207 MatStashSetInitialSize_Private - Sets the initial size of the stash 208 209 Input Parameters: 210 stash - the stash 211 max - the value that is used as the max size of the stash. 212 this value is used while allocating memory. 213 */ 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatStashSetInitialSize_Private" 216 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 217 { 218 PetscFunctionBegin; 219 stash->umax = max; 220 PetscFunctionReturn(0); 221 } 222 223 /* MatStashExpand_Private - Expand the stash. This function is called 224 when the space in the stash is not sufficient to add the new values 225 being inserted into the stash. 226 227 Input Parameters: 228 stash - the stash 229 incr - the minimum increase requested 230 231 Notes: 232 This routine doubles the currently used memory. 233 */ 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatStashExpand_Private" 236 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 237 { 238 PetscErrorCode ierr; 239 PetscInt newnmax,bs2= stash->bs*stash->bs; 240 241 PetscFunctionBegin; 242 /* allocate a larger stash */ 243 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 244 if (stash->umax) newnmax = stash->umax/bs2; 245 else newnmax = DEFAULT_STASH_SIZE/bs2; 246 } else if (!stash->nmax) { /* resuing stash */ 247 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 248 else newnmax = stash->oldnmax/bs2; 249 } else newnmax = stash->nmax*2; 250 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 251 252 /* Get a MatStashSpace and attach it to stash */ 253 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 254 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 255 stash->space_head = stash->space; 256 } 257 258 stash->reallocs++; 259 stash->nmax = newnmax; 260 PetscFunctionReturn(0); 261 } 262 /* 263 MatStashValuesRow_Private - inserts values into the stash. This function 264 expects the values to be roworiented. Multiple columns belong to the same row 265 can be inserted with a single call to this function. 266 267 Input Parameters: 268 stash - the stash 269 row - the global row correspoiding to the values 270 n - the number of elements inserted. All elements belong to the above row. 271 idxn - the global column indices corresponding to each of the values. 272 values - the values inserted 273 */ 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatStashValuesRow_Private" 276 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 277 { 278 PetscErrorCode ierr; 279 PetscInt i,k,cnt = 0; 280 PetscMatStashSpace space=stash->space; 281 282 PetscFunctionBegin; 283 /* Check and see if we have sufficient memory */ 284 if (!space || space->local_remaining < n) { 285 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 286 } 287 space = stash->space; 288 k = space->local_used; 289 for (i=0; i<n; i++) { 290 if (ignorezeroentries && (values[i] == 0.0)) continue; 291 space->idx[k] = row; 292 space->idy[k] = idxn[i]; 293 space->val[k] = values[i]; 294 k++; 295 cnt++; 296 } 297 stash->n += cnt; 298 space->local_used += cnt; 299 space->local_remaining -= cnt; 300 PetscFunctionReturn(0); 301 } 302 303 /* 304 MatStashValuesCol_Private - inserts values into the stash. This function 305 expects the values to be columnoriented. Multiple columns belong to the same row 306 can be inserted with a single call to this function. 307 308 Input Parameters: 309 stash - the stash 310 row - the global row correspoiding to the values 311 n - the number of elements inserted. All elements belong to the above row. 312 idxn - the global column indices corresponding to each of the values. 313 values - the values inserted 314 stepval - the consecutive values are sepated by a distance of stepval. 315 this happens because the input is columnoriented. 316 */ 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatStashValuesCol_Private" 319 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 320 { 321 PetscErrorCode ierr; 322 PetscInt i,k,cnt = 0; 323 PetscMatStashSpace space=stash->space; 324 325 PetscFunctionBegin; 326 /* Check and see if we have sufficient memory */ 327 if (!space || space->local_remaining < n) { 328 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 329 } 330 space = stash->space; 331 k = space->local_used; 332 for (i=0; i<n; i++) { 333 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 334 space->idx[k] = row; 335 space->idy[k] = idxn[i]; 336 space->val[k] = values[i*stepval]; 337 k++; 338 cnt++; 339 } 340 stash->n += cnt; 341 space->local_used += cnt; 342 space->local_remaining -= cnt; 343 PetscFunctionReturn(0); 344 } 345 346 /* 347 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 348 This function expects the values to be roworiented. Multiple columns belong 349 to the same block-row can be inserted with a single call to this function. 350 This function extracts the sub-block of values based on the dimensions of 351 the original input block, and the row,col values corresponding to the blocks. 352 353 Input Parameters: 354 stash - the stash 355 row - the global block-row correspoiding to the values 356 n - the number of elements inserted. All elements belong to the above row. 357 idxn - the global block-column indices corresponding to each of the blocks of 358 values. Each block is of size bs*bs. 359 values - the values inserted 360 rmax - the number of block-rows in the original block. 361 cmax - the number of block-columsn on the original block. 362 idx - the index of the current block-row in the original block. 363 */ 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 366 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 367 { 368 PetscErrorCode ierr; 369 PetscInt i,j,k,bs2,bs=stash->bs,l; 370 const PetscScalar *vals; 371 PetscScalar *array; 372 PetscMatStashSpace space=stash->space; 373 374 PetscFunctionBegin; 375 if (!space || space->local_remaining < n) { 376 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 377 } 378 space = stash->space; 379 l = space->local_used; 380 bs2 = bs*bs; 381 for (i=0; i<n; i++) { 382 space->idx[l] = row; 383 space->idy[l] = idxn[i]; 384 /* Now copy over the block of values. Store the values column oriented. 385 This enables inserting multiple blocks belonging to a row with a single 386 funtion call */ 387 array = space->val + bs2*l; 388 vals = values + idx*bs2*n + bs*i; 389 for (j=0; j<bs; j++) { 390 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 391 array++; 392 vals += cmax*bs; 393 } 394 l++; 395 } 396 stash->n += n; 397 space->local_used += n; 398 space->local_remaining -= n; 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 404 This function expects the values to be roworiented. Multiple columns belong 405 to the same block-row can be inserted with a single call to this function. 406 This function extracts the sub-block of values based on the dimensions of 407 the original input block, and the row,col values corresponding to the blocks. 408 409 Input Parameters: 410 stash - the stash 411 row - the global block-row correspoiding to the values 412 n - the number of elements inserted. All elements belong to the above row. 413 idxn - the global block-column indices corresponding to each of the blocks of 414 values. Each block is of size bs*bs. 415 values - the values inserted 416 rmax - the number of block-rows in the original block. 417 cmax - the number of block-columsn on the original block. 418 idx - the index of the current block-row in the original block. 419 */ 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatStashValuesColBlocked_Private" 422 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 423 { 424 PetscErrorCode ierr; 425 PetscInt i,j,k,bs2,bs=stash->bs,l; 426 const PetscScalar *vals; 427 PetscScalar *array; 428 PetscMatStashSpace space=stash->space; 429 430 PetscFunctionBegin; 431 if (!space || space->local_remaining < n) { 432 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 433 } 434 space = stash->space; 435 l = space->local_used; 436 bs2 = bs*bs; 437 for (i=0; i<n; i++) { 438 space->idx[l] = row; 439 space->idy[l] = idxn[i]; 440 /* Now copy over the block of values. Store the values column oriented. 441 This enables inserting multiple blocks belonging to a row with a single 442 funtion call */ 443 array = space->val + bs2*l; 444 vals = values + idx*bs2*n + bs*i; 445 for (j=0; j<bs; j++) { 446 for (k=0; k<bs; k++) array[k] = vals[k]; 447 array += bs; 448 vals += rmax*bs; 449 } 450 l++; 451 } 452 stash->n += n; 453 space->local_used += n; 454 space->local_remaining -= n; 455 PetscFunctionReturn(0); 456 } 457 /* 458 MatStashScatterBegin_Private - Initiates the transfer of values to the 459 correct owners. This function goes through the stash, and check the 460 owners of each stashed value, and sends the values off to the owner 461 processors. 462 463 Input Parameters: 464 stash - the stash 465 owners - an array of size 'no-of-procs' which gives the ownership range 466 for each node. 467 468 Notes: The 'owners' array in the cased of the blocked-stash has the 469 ranges specified blocked global indices, and for the regular stash in 470 the proper global indices. 471 */ 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatStashScatterBegin_Private" 474 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 475 { 476 PetscErrorCode ierr; 477 478 PetscFunctionBegin; 479 ierr = (*stash->ScatterBegin)(mat,stash,owners);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatStashScatterBegin_Ref" 485 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) 486 { 487 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 488 PetscInt size=stash->size,nsends; 489 PetscErrorCode ierr; 490 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 491 PetscScalar **rvalues,*svalues; 492 MPI_Comm comm = stash->comm; 493 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 494 PetscMPIInt *sizes,*nlengths,nreceives; 495 PetscInt *sp_idx,*sp_idy; 496 PetscScalar *sp_val; 497 PetscMatStashSpace space,space_next; 498 499 PetscFunctionBegin; 500 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 501 InsertMode addv; 502 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 503 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 504 mat->insertmode = addv; /* in case this processor had no cache */ 505 } 506 507 bs2 = stash->bs*stash->bs; 508 509 /* first count number of contributors to each processor */ 510 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); 511 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); 512 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); 513 514 i = j = 0; 515 lastidx = -1; 516 space = stash->space_head; 517 while (space != NULL) { 518 space_next = space->next; 519 sp_idx = space->idx; 520 for (l=0; l<space->local_used; l++) { 521 /* if indices are NOT locally sorted, need to start search at the beginning */ 522 if (lastidx > (idx = sp_idx[l])) j = 0; 523 lastidx = idx; 524 for (; j<size; j++) { 525 if (idx >= owners[j] && idx < owners[j+1]) { 526 nlengths[j]++; owner[i] = j; break; 527 } 528 } 529 i++; 530 } 531 space = space_next; 532 } 533 /* Now check what procs get messages - and compute nsends. */ 534 for (i=0, nsends=0; i<size; i++) { 535 if (nlengths[i]) { 536 sizes[i] = 1; nsends++; 537 } 538 } 539 540 {PetscMPIInt *onodes,*olengths; 541 /* Determine the number of messages to expect, their lengths, from from-ids */ 542 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); 543 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 544 /* since clubbing row,col - lengths are multiplied by 2 */ 545 for (i=0; i<nreceives; i++) olengths[i] *=2; 546 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 547 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 548 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 549 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 550 ierr = PetscFree(onodes);CHKERRQ(ierr); 551 ierr = PetscFree(olengths);CHKERRQ(ierr);} 552 553 /* do sends: 554 1) starts[i] gives the starting index in svalues for stuff going to 555 the ith processor 556 */ 557 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); 558 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); 559 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); 560 /* use 2 sends the first with all_a, the next with all_i and all_j */ 561 startv[0] = 0; starti[0] = 0; 562 for (i=1; i<size; i++) { 563 startv[i] = startv[i-1] + nlengths[i-1]; 564 starti[i] = starti[i-1] + 2*nlengths[i-1]; 565 } 566 567 i = 0; 568 space = stash->space_head; 569 while (space != NULL) { 570 space_next = space->next; 571 sp_idx = space->idx; 572 sp_idy = space->idy; 573 sp_val = space->val; 574 for (l=0; l<space->local_used; l++) { 575 j = owner[i]; 576 if (bs2 == 1) { 577 svalues[startv[j]] = sp_val[l]; 578 } else { 579 PetscInt k; 580 PetscScalar *buf1,*buf2; 581 buf1 = svalues+bs2*startv[j]; 582 buf2 = space->val + bs2*l; 583 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 584 } 585 sindices[starti[j]] = sp_idx[l]; 586 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 587 startv[j]++; 588 starti[j]++; 589 i++; 590 } 591 space = space_next; 592 } 593 startv[0] = 0; 594 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 595 596 for (i=0,count=0; i<size; i++) { 597 if (sizes[i]) { 598 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 599 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 600 } 601 } 602 #if defined(PETSC_USE_INFO) 603 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); 604 for (i=0; i<size; i++) { 605 if (sizes[i]) { 606 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 607 } 608 } 609 #endif 610 ierr = PetscFree(nlengths);CHKERRQ(ierr); 611 ierr = PetscFree(owner);CHKERRQ(ierr); 612 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 613 ierr = PetscFree(sizes);CHKERRQ(ierr); 614 615 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 616 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); 617 618 for (i=0; i<nreceives; i++) { 619 recv_waits[2*i] = recv_waits1[i]; 620 recv_waits[2*i+1] = recv_waits2[i]; 621 } 622 stash->recv_waits = recv_waits; 623 624 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 625 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 626 627 stash->svalues = svalues; 628 stash->sindices = sindices; 629 stash->rvalues = rvalues; 630 stash->rindices = rindices; 631 stash->send_waits = send_waits; 632 stash->nsends = nsends; 633 stash->nrecvs = nreceives; 634 stash->reproduce_count = 0; 635 PetscFunctionReturn(0); 636 } 637 638 /* 639 MatStashScatterGetMesg_Private - This function waits on the receives posted 640 in the function MatStashScatterBegin_Private() and returns one message at 641 a time to the calling function. If no messages are left, it indicates this 642 by setting flg = 0, else it sets flg = 1. 643 644 Input Parameters: 645 stash - the stash 646 647 Output Parameters: 648 nvals - the number of entries in the current message. 649 rows - an array of row indices (or blocked indices) corresponding to the values 650 cols - an array of columnindices (or blocked indices) corresponding to the values 651 vals - the values 652 flg - 0 indicates no more message left, and the current call has no values associated. 653 1 indicates that the current call successfully received a message, and the 654 other output parameters nvals,rows,cols,vals are set appropriately. 655 */ 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatStashScatterGetMesg_Private" 658 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 659 { 660 PetscErrorCode ierr; 661 662 PetscFunctionBegin; 663 ierr = (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatStashScatterGetMesg_Ref" 669 static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 670 { 671 PetscErrorCode ierr; 672 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 673 PetscInt bs2; 674 MPI_Status recv_status; 675 PetscBool match_found = PETSC_FALSE; 676 677 PetscFunctionBegin; 678 *flg = 0; /* When a message is discovered this is reset to 1 */ 679 /* Return if no more messages to process */ 680 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 681 682 bs2 = stash->bs*stash->bs; 683 /* If a matching pair of receives are found, process them, and return the data to 684 the calling function. Until then keep receiving messages */ 685 while (!match_found) { 686 if (stash->reproduce) { 687 i = stash->reproduce_count++; 688 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 689 } else { 690 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 691 } 692 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 693 694 /* Now pack the received message into a structure which is usable by others */ 695 if (i % 2) { 696 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 697 698 flg_v[2*recv_status.MPI_SOURCE] = i/2; 699 700 *nvals = *nvals/bs2; 701 } else { 702 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 703 704 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 705 706 *nvals = *nvals/2; /* This message has both row indices and col indices */ 707 } 708 709 /* Check if we have both messages from this proc */ 710 i1 = flg_v[2*recv_status.MPI_SOURCE]; 711 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 712 if (i1 != -1 && i2 != -1) { 713 *rows = stash->rindices[i2]; 714 *cols = *rows + *nvals; 715 *vals = stash->rvalues[i1]; 716 *flg = 1; 717 stash->nprocessed++; 718 match_found = PETSC_TRUE; 719 } 720 } 721 PetscFunctionReturn(0); 722 } 723 724 typedef struct { 725 PetscInt row; 726 PetscInt col; 727 PetscScalar vals[1]; /* Actually an array of length bs2 */ 728 } MatStashBlock; 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatStashSortCompress_Private" 732 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode) 733 { 734 PetscErrorCode ierr; 735 PetscMatStashSpace space; 736 PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i; 737 PetscScalar **valptr; 738 739 PetscFunctionBegin; 740 ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr); 741 for (space=stash->space_head,cnt=0; space; space=space->next) { 742 for (i=0; i<space->local_used; i++) { 743 row[cnt] = space->idx[i]; 744 col[cnt] = space->idy[i]; 745 valptr[cnt] = &space->val[i*bs2]; 746 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 747 cnt++; 748 } 749 } 750 if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt); 751 ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr); 752 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 753 for (rowstart=0,cnt=0,i=1; i<=n; i++) { 754 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 755 PetscInt colstart; 756 ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr); 757 for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */ 758 PetscInt j,l; 759 MatStashBlock *block; 760 ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr); 761 block->row = row[rowstart]; 762 block->col = col[colstart]; 763 ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 764 for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 765 if (insertmode == ADD_VALUES) { 766 for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l]; 767 } else { 768 ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr); 769 } 770 } 771 colstart = j; 772 } 773 rowstart = i; 774 } 775 } 776 ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 #undef __FUNCT__ 781 #define __FUNCT__ "MatStashBlockTypeSetUp" 782 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 783 { 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 if (stash->blocktype == MPI_DATATYPE_NULL) { 788 PetscInt bs2 = PetscSqr(stash->bs); 789 PetscMPIInt blocklens[2]; 790 MPI_Aint displs[2]; 791 MPI_Datatype types[2],stype; 792 /* C++ std::complex is not my favorite datatype. Since it is not POD, we cannot use offsetof to find the offset of 793 * vals. But the layout is actually guaranteed by the standard, so we do a little dance here with struct 794 * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset. 795 */ 796 struct DummyBlock {PetscInt row,col; PetscReal vals;}; 797 798 stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar); 799 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 800 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 801 } 802 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);CHKERRQ(ierr); 803 ierr = PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);CHKERRQ(ierr); 804 ierr = PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);CHKERRQ(ierr); 805 blocklens[0] = 2; 806 blocklens[1] = bs2; 807 displs[0] = offsetof(struct DummyBlock,row); 808 displs[1] = offsetof(struct DummyBlock,vals); 809 types[0] = MPIU_INT; 810 types[1] = MPIU_SCALAR; 811 ierr = MPI_Type_create_struct(2,blocklens,displs,types,&stype);CHKERRQ(ierr); 812 ierr = MPI_Type_commit(&stype);CHKERRQ(ierr); 813 ierr = MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype);CHKERRQ(ierr); /* MPI-2 */ 814 ierr = MPI_Type_commit(&stash->blocktype);CHKERRQ(ierr); 815 ierr = MPI_Type_free(&stype);CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatStashBTSSend_Private" 822 /* Callback invoked after target rank has initiatied receive of rendezvous message. 823 * Here we post the main sends. 824 */ 825 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx) 826 { 827 MatStash *stash = (MatStash*)ctx; 828 MatStashHeader *hdr = (MatStashHeader*)sdata; 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]); 833 ierr = MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 834 stash->sendframes[rankid].count = hdr->count; 835 stash->sendframes[rankid].pending = 1; 836 PetscFunctionReturn(0); 837 } 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "MatStashBTSRecv_Private" 841 /* Callback invoked by target after receiving rendezvous message. 842 * Here we post the main recvs. 843 */ 844 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx) 845 { 846 MatStash *stash = (MatStash*)ctx; 847 MatStashHeader *hdr = (MatStashHeader*)rdata; 848 MatStashFrame *frame; 849 PetscErrorCode ierr; 850 851 PetscFunctionBegin; 852 ierr = PetscSegBufferGet(stash->segrecvframe,1,&frame);CHKERRQ(ierr); 853 ierr = PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);CHKERRQ(ierr); 854 ierr = MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);CHKERRQ(ierr); 855 frame->count = hdr->count; 856 frame->pending = 1; 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatStashScatterBegin_BTS" 862 /* 863 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 864 */ 865 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[]) 866 { 867 PetscErrorCode ierr; 868 size_t nblocks; 869 char *sendblocks; 870 871 PetscFunctionBegin; 872 #if defined(PETSC_USE_DEBUG) 873 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 874 InsertMode addv; 875 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 876 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 877 } 878 #endif 879 880 if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */ 881 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 882 } 883 884 ierr = MatStashBlockTypeSetUp(stash);CHKERRQ(ierr); 885 ierr = MatStashSortCompress_Private(stash,mat->insertmode);CHKERRQ(ierr); 886 ierr = PetscSegBufferGetSize(stash->segsendblocks,&nblocks);CHKERRQ(ierr); 887 ierr = PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);CHKERRQ(ierr); 888 if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 889 PetscInt i; 890 size_t b; 891 for (i=0,b=0; i<stash->nsendranks; i++) { 892 stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size]; 893 /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 894 stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 895 for ( ; b<nblocks; b++) { 896 MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size]; 897 if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]); 898 if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break; 899 stash->sendhdr[i].count++; 900 } 901 } 902 } else { /* Dynamically count and pack (first time) */ 903 PetscInt sendno; 904 size_t i,rowstart; 905 906 /* Count number of send ranks and allocate for sends */ 907 stash->nsendranks = 0; 908 for (rowstart=0; rowstart<nblocks; ) { 909 PetscInt owner; 910 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 911 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 912 if (owner < 0) owner = -(owner+2); 913 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 914 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 915 if (sendblock_i->row >= owners[owner+1]) break; 916 } 917 stash->nsendranks++; 918 rowstart = i; 919 } 920 ierr = PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);CHKERRQ(ierr); 921 922 /* Set up sendhdrs and sendframes */ 923 sendno = 0; 924 for (rowstart=0; rowstart<nblocks; ) { 925 PetscInt owner; 926 MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size]; 927 ierr = PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);CHKERRQ(ierr); 928 if (owner < 0) owner = -(owner+2); 929 stash->sendranks[sendno] = owner; 930 for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 931 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 932 if (sendblock_i->row >= owners[owner+1]) break; 933 } 934 stash->sendframes[sendno].buffer = sendblock_rowstart; 935 stash->sendframes[sendno].pending = 0; 936 stash->sendhdr[sendno].count = i - rowstart; 937 sendno++; 938 rowstart = i; 939 } 940 if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno); 941 } 942 943 /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 944 * message or a dummy entry of some sort. */ 945 if (mat->insertmode == INSERT_VALUES) { 946 size_t i; 947 for (i=0; i<nblocks; i++) { 948 MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size]; 949 sendblock_i->row = -(sendblock_i->row+1); 950 } 951 } 952 953 if (stash->subset_off_proc && mat->subsetoffprocentries) { 954 PetscMPIInt i,tag; 955 ierr = PetscCommGetNewTag(stash->comm,&tag);CHKERRQ(ierr); 956 for (i=0; i<stash->nrecvranks; i++) { 957 ierr = MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);CHKERRQ(ierr); 958 } 959 for (i=0; i<stash->nsendranks; i++) { 960 ierr = MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);CHKERRQ(ierr); 961 } 962 stash->use_status = PETSC_TRUE; /* Use count from message status. */ 963 } else { 964 ierr = PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr, 965 &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs, 966 MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);CHKERRQ(ierr); 967 ierr = PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);CHKERRQ(ierr); 968 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 969 } 970 971 ierr = PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);CHKERRQ(ierr); 972 stash->recvframe_active = NULL; 973 stash->recvframe_i = 0; 974 stash->some_i = 0; 975 stash->some_count = 0; 976 stash->recvcount = 0; 977 stash->subset_off_proc = mat->subsetoffprocentries; 978 stash->insertmode = &mat->insertmode; 979 PetscFunctionReturn(0); 980 } 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatStashScatterGetMesg_BTS" 984 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg) 985 { 986 PetscErrorCode ierr; 987 MatStashBlock *block; 988 989 PetscFunctionBegin; 990 *flg = 0; 991 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 992 if (stash->some_i == stash->some_count) { 993 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(0); /* Done */ 994 ierr = MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);CHKERRQ(ierr); 995 stash->some_i = 0; 996 } 997 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 998 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 999 if (stash->use_status) { /* Count what was actually sent */ 1000 ierr = MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);CHKERRQ(ierr); 1001 } 1002 if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 1003 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0]; 1004 if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 1005 if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 1006 if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]); 1007 } 1008 stash->some_i++; 1009 stash->recvcount++; 1010 stash->recvframe_i = 0; 1011 } 1012 *n = 1; 1013 block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size]; 1014 if (block->row < 0) block->row = -(block->row + 1); 1015 *row = &block->row; 1016 *col = &block->col; 1017 *val = block->vals; 1018 stash->recvframe_i++; 1019 *flg = 1; 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatStashScatterEnd_BTS" 1025 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 1026 { 1027 PetscErrorCode ierr; 1028 1029 PetscFunctionBegin; 1030 ierr = MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1031 if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 1032 void *dummy; 1033 ierr = PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);CHKERRQ(ierr); 1034 } else { /* No reuse, so collect everything. */ 1035 ierr = MatStashScatterDestroy_BTS(stash);CHKERRQ(ierr); 1036 } 1037 1038 /* Now update nmaxold to be app 10% more than max n used, this way the 1039 wastage of space is reduced the next time this stash is used. 1040 Also update the oldmax, only if it increases */ 1041 if (stash->n) { 1042 PetscInt bs2 = stash->bs*stash->bs; 1043 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 1044 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 1045 } 1046 1047 stash->nmax = 0; 1048 stash->n = 0; 1049 stash->reallocs = -1; 1050 stash->nprocessed = 0; 1051 1052 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 1053 1054 stash->space = 0; 1055 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "MatStashScatterDestroy_BTS" 1061 static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 1062 { 1063 PetscErrorCode ierr; 1064 1065 PetscFunctionBegin; 1066 ierr = PetscSegBufferDestroy(&stash->segsendblocks);CHKERRQ(ierr); 1067 ierr = PetscSegBufferDestroy(&stash->segrecvframe);CHKERRQ(ierr); 1068 stash->recvframes = NULL; 1069 ierr = PetscSegBufferDestroy(&stash->segrecvblocks);CHKERRQ(ierr); 1070 if (stash->blocktype != MPI_DATATYPE_NULL) { 1071 ierr = MPI_Type_free(&stash->blocktype);CHKERRQ(ierr); 1072 } 1073 stash->nsendranks = 0; 1074 stash->nrecvranks = 0; 1075 ierr = PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);CHKERRQ(ierr); 1076 ierr = PetscFree(stash->sendreqs);CHKERRQ(ierr); 1077 ierr = PetscFree(stash->recvreqs);CHKERRQ(ierr); 1078 ierr = PetscFree(stash->recvranks);CHKERRQ(ierr); 1079 ierr = PetscFree(stash->recvhdr);CHKERRQ(ierr); 1080 ierr = PetscFree2(stash->some_indices,stash->some_statuses);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083