1 2 #include <petsc-private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 /* 7 MatStashCreate_Private - Creates a stash,currently used for all the parallel 8 matrix implementations. The stash is where elements of a matrix destined 9 to be stored on other processors are kept until matrix assembly is done. 10 11 This is a simple minded stash. Simply adds entries to end of stash. 12 13 Input Parameters: 14 comm - communicator, required for scatters. 15 bs - stash block size. used when stashing blocks of values 16 17 Output Parameters: 18 stash - the newly created stash 19 */ 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatStashCreate_Private" 22 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 23 { 24 PetscErrorCode ierr; 25 PetscInt max,*opt,nopt,i; 26 PetscBool flg; 27 28 PetscFunctionBegin; 29 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 30 stash->comm = comm; 31 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36 ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); 37 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 38 39 40 nopt = stash->size; 41 ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 42 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 43 if (flg) { 44 if (nopt == 1) max = opt[0]; 45 else if (nopt == stash->size) max = opt[stash->rank]; 46 else if (stash->rank < nopt) max = opt[stash->rank]; 47 else max = 0; /* Use default */ 48 stash->umax = max; 49 } else { 50 stash->umax = 0; 51 } 52 ierr = PetscFree(opt);CHKERRQ(ierr); 53 if (bs <= 0) bs = 1; 54 55 stash->bs = bs; 56 stash->nmax = 0; 57 stash->oldnmax = 0; 58 stash->n = 0; 59 stash->reallocs = -1; 60 stash->space_head = 0; 61 stash->space = 0; 62 63 stash->send_waits = 0; 64 stash->recv_waits = 0; 65 stash->send_status = 0; 66 stash->nsends = 0; 67 stash->nrecvs = 0; 68 stash->svalues = 0; 69 stash->rvalues = 0; 70 stash->rindices = 0; 71 stash->nprocessed = 0; 72 stash->reproduce = PETSC_FALSE; 73 74 ierr = PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 MatStashDestroy_Private - Destroy the stash 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatStashDestroy_Private" 83 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 84 { 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 89 90 stash->space = 0; 91 92 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 96 /* 97 MatStashScatterEnd_Private - This is called as the final stage of 98 scatter. The final stages of message passing is done here, and 99 all the memory used for message passing is cleaned up. This 100 routine also resets the stash, and deallocates the memory used 101 for the stash. It also keeps track of the current memory usage 102 so that the same value can be used the next time through. 103 */ 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatStashScatterEnd_Private" 106 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 107 { 108 PetscErrorCode ierr; 109 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 110 MPI_Status *send_status; 111 112 PetscFunctionBegin; 113 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 114 /* wait on sends */ 115 if (nsends) { 116 ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 117 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 118 ierr = PetscFree(send_status);CHKERRQ(ierr); 119 } 120 121 /* Now update nmaxold to be app 10% more than max n used, this way the 122 wastage of space is reduced the next time this stash is used. 123 Also update the oldmax, only if it increases */ 124 if (stash->n) { 125 bs2 = stash->bs*stash->bs; 126 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 127 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 128 } 129 130 stash->nmax = 0; 131 stash->n = 0; 132 stash->reallocs = -1; 133 stash->nprocessed = 0; 134 135 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 136 137 stash->space = 0; 138 139 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 140 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 141 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 142 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 143 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 144 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 145 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 /* 150 MatStashGetInfo_Private - Gets the relavant statistics of the stash 151 152 Input Parameters: 153 stash - the stash 154 nstash - the size of the stash. Indicates the number of values stored. 155 reallocs - the number of additional mallocs incurred. 156 157 */ 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatStashGetInfo_Private" 160 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 161 { 162 PetscInt bs2 = stash->bs*stash->bs; 163 164 PetscFunctionBegin; 165 if (nstash) *nstash = stash->n*bs2; 166 if (reallocs) { 167 if (stash->reallocs < 0) *reallocs = 0; 168 else *reallocs = stash->reallocs; 169 } 170 PetscFunctionReturn(0); 171 } 172 173 /* 174 MatStashSetInitialSize_Private - Sets the initial size of the stash 175 176 Input Parameters: 177 stash - the stash 178 max - the value that is used as the max size of the stash. 179 this value is used while allocating memory. 180 */ 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatStashSetInitialSize_Private" 183 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 184 { 185 PetscFunctionBegin; 186 stash->umax = max; 187 PetscFunctionReturn(0); 188 } 189 190 /* MatStashExpand_Private - Expand the stash. This function is called 191 when the space in the stash is not sufficient to add the new values 192 being inserted into the stash. 193 194 Input Parameters: 195 stash - the stash 196 incr - the minimum increase requested 197 198 Notes: 199 This routine doubles the currently used memory. 200 */ 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatStashExpand_Private" 203 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 204 { 205 PetscErrorCode ierr; 206 PetscInt newnmax,bs2= stash->bs*stash->bs; 207 208 PetscFunctionBegin; 209 /* allocate a larger stash */ 210 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 211 if (stash->umax) newnmax = stash->umax/bs2; 212 else newnmax = DEFAULT_STASH_SIZE/bs2; 213 } else if (!stash->nmax) { /* resuing stash */ 214 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 215 else newnmax = stash->oldnmax/bs2; 216 } else newnmax = stash->nmax*2; 217 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 218 219 /* Get a MatStashSpace and attach it to stash */ 220 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 221 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 222 stash->space_head = stash->space; 223 } 224 225 stash->reallocs++; 226 stash->nmax = newnmax; 227 PetscFunctionReturn(0); 228 } 229 /* 230 MatStashValuesRow_Private - inserts values into the stash. This function 231 expects the values to be roworiented. Multiple columns belong to the same row 232 can be inserted with a single call to this function. 233 234 Input Parameters: 235 stash - the stash 236 row - the global row correspoiding to the values 237 n - the number of elements inserted. All elements belong to the above row. 238 idxn - the global column indices corresponding to each of the values. 239 values - the values inserted 240 */ 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatStashValuesRow_Private" 243 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 244 { 245 PetscErrorCode ierr; 246 PetscInt i,k,cnt = 0; 247 PetscMatStashSpace space=stash->space; 248 249 PetscFunctionBegin; 250 /* Check and see if we have sufficient memory */ 251 if (!space || space->local_remaining < n) { 252 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 253 } 254 space = stash->space; 255 k = space->local_used; 256 for (i=0; i<n; i++) { 257 if (ignorezeroentries && (values[i] == 0.0)) continue; 258 space->idx[k] = row; 259 space->idy[k] = idxn[i]; 260 space->val[k] = values[i]; 261 k++; 262 cnt++; 263 } 264 stash->n += cnt; 265 space->local_used += cnt; 266 space->local_remaining -= cnt; 267 PetscFunctionReturn(0); 268 } 269 270 /* 271 MatStashValuesCol_Private - inserts values into the stash. This function 272 expects the values to be columnoriented. Multiple columns belong to the same row 273 can be inserted with a single call to this function. 274 275 Input Parameters: 276 stash - the stash 277 row - the global row correspoiding to the values 278 n - the number of elements inserted. All elements belong to the above row. 279 idxn - the global column indices corresponding to each of the values. 280 values - the values inserted 281 stepval - the consecutive values are sepated by a distance of stepval. 282 this happens because the input is columnoriented. 283 */ 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatStashValuesCol_Private" 286 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 287 { 288 PetscErrorCode ierr; 289 PetscInt i,k,cnt = 0; 290 PetscMatStashSpace space=stash->space; 291 292 PetscFunctionBegin; 293 /* Check and see if we have sufficient memory */ 294 if (!space || space->local_remaining < n) { 295 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 296 } 297 space = stash->space; 298 k = space->local_used; 299 for (i=0; i<n; i++) { 300 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 301 space->idx[k] = row; 302 space->idy[k] = idxn[i]; 303 space->val[k] = values[i*stepval]; 304 k++; 305 cnt++; 306 } 307 stash->n += cnt; 308 space->local_used += cnt; 309 space->local_remaining -= cnt; 310 PetscFunctionReturn(0); 311 } 312 313 /* 314 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 315 This function expects the values to be roworiented. Multiple columns belong 316 to the same block-row can be inserted with a single call to this function. 317 This function extracts the sub-block of values based on the dimensions of 318 the original input block, and the row,col values corresponding to the blocks. 319 320 Input Parameters: 321 stash - the stash 322 row - the global block-row correspoiding to the values 323 n - the number of elements inserted. All elements belong to the above row. 324 idxn - the global block-column indices corresponding to each of the blocks of 325 values. Each block is of size bs*bs. 326 values - the values inserted 327 rmax - the number of block-rows in the original block. 328 cmax - the number of block-columsn on the original block. 329 idx - the index of the current block-row in the original block. 330 */ 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 333 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 334 { 335 PetscErrorCode ierr; 336 PetscInt i,j,k,bs2,bs=stash->bs,l; 337 const PetscScalar *vals; 338 PetscScalar *array; 339 PetscMatStashSpace space=stash->space; 340 341 PetscFunctionBegin; 342 if (!space || space->local_remaining < n) { 343 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 344 } 345 space = stash->space; 346 l = space->local_used; 347 bs2 = bs*bs; 348 for (i=0; i<n; i++) { 349 space->idx[l] = row; 350 space->idy[l] = idxn[i]; 351 /* Now copy over the block of values. Store the values column oriented. 352 This enables inserting multiple blocks belonging to a row with a single 353 funtion call */ 354 array = space->val + bs2*l; 355 vals = values + idx*bs2*n + bs*i; 356 for (j=0; j<bs; j++) { 357 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 358 array++; 359 vals += cmax*bs; 360 } 361 l++; 362 } 363 stash->n += n; 364 space->local_used += n; 365 space->local_remaining -= n; 366 PetscFunctionReturn(0); 367 } 368 369 /* 370 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 371 This function expects the values to be roworiented. Multiple columns belong 372 to the same block-row can be inserted with a single call to this function. 373 This function extracts the sub-block of values based on the dimensions of 374 the original input block, and the row,col values corresponding to the blocks. 375 376 Input Parameters: 377 stash - the stash 378 row - the global block-row correspoiding to the values 379 n - the number of elements inserted. All elements belong to the above row. 380 idxn - the global block-column indices corresponding to each of the blocks of 381 values. Each block is of size bs*bs. 382 values - the values inserted 383 rmax - the number of block-rows in the original block. 384 cmax - the number of block-columsn on the original block. 385 idx - the index of the current block-row in the original block. 386 */ 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatStashValuesColBlocked_Private" 389 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 390 { 391 PetscErrorCode ierr; 392 PetscInt i,j,k,bs2,bs=stash->bs,l; 393 const PetscScalar *vals; 394 PetscScalar *array; 395 PetscMatStashSpace space=stash->space; 396 397 PetscFunctionBegin; 398 if (!space || space->local_remaining < n) { 399 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 400 } 401 space = stash->space; 402 l = space->local_used; 403 bs2 = bs*bs; 404 for (i=0; i<n; i++) { 405 space->idx[l] = row; 406 space->idy[l] = idxn[i]; 407 /* Now copy over the block of values. Store the values column oriented. 408 This enables inserting multiple blocks belonging to a row with a single 409 funtion call */ 410 array = space->val + bs2*l; 411 vals = values + idx*bs2*n + bs*i; 412 for (j=0; j<bs; j++) { 413 for (k=0; k<bs; k++) array[k] = vals[k]; 414 array += bs; 415 vals += rmax*bs; 416 } 417 l++; 418 } 419 stash->n += n; 420 space->local_used += n; 421 space->local_remaining -= n; 422 PetscFunctionReturn(0); 423 } 424 /* 425 MatStashScatterBegin_Private - Initiates the transfer of values to the 426 correct owners. This function goes through the stash, and check the 427 owners of each stashed value, and sends the values off to the owner 428 processors. 429 430 Input Parameters: 431 stash - the stash 432 owners - an array of size 'no-of-procs' which gives the ownership range 433 for each node. 434 435 Notes: The 'owners' array in the cased of the blocked-stash has the 436 ranges specified blocked global indices, and for the regular stash in 437 the proper global indices. 438 */ 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatStashScatterBegin_Private" 441 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 442 { 443 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 444 PetscInt size=stash->size,nsends; 445 PetscErrorCode ierr; 446 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 447 PetscScalar **rvalues,*svalues; 448 MPI_Comm comm = stash->comm; 449 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 450 PetscMPIInt *nprocs,*nlengths,nreceives; 451 PetscInt *sp_idx,*sp_idy; 452 PetscScalar *sp_val; 453 PetscMatStashSpace space,space_next; 454 455 PetscFunctionBegin; 456 bs2 = stash->bs*stash->bs; 457 458 /* first count number of contributors to each processor */ 459 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 460 ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 461 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 462 ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 463 ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 464 465 i = j = 0; 466 lastidx = -1; 467 space = stash->space_head; 468 while (space != PETSC_NULL) { 469 space_next = space->next; 470 sp_idx = space->idx; 471 for (l=0; l<space->local_used; l++) { 472 /* if indices are NOT locally sorted, need to start search at the beginning */ 473 if (lastidx > (idx = sp_idx[l])) j = 0; 474 lastidx = idx; 475 for (; j<size; j++) { 476 if (idx >= owners[j] && idx < owners[j+1]) { 477 nlengths[j]++; owner[i] = j; break; 478 } 479 } 480 i++; 481 } 482 space = space_next; 483 } 484 /* Now check what procs get messages - and compute nsends. */ 485 for (i=0, nsends=0; i<size; i++) { 486 if (nlengths[i]) { 487 nprocs[i] = 1; nsends++; 488 } 489 } 490 491 {PetscMPIInt *onodes,*olengths; 492 /* Determine the number of messages to expect, their lengths, from from-ids */ 493 ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 494 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 495 /* since clubbing row,col - lengths are multiplied by 2 */ 496 for (i=0; i<nreceives; i++) olengths[i] *=2; 497 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 498 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 499 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 500 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 501 ierr = PetscFree(onodes);CHKERRQ(ierr); 502 ierr = PetscFree(olengths);CHKERRQ(ierr);} 503 504 /* do sends: 505 1) starts[i] gives the starting index in svalues for stuff going to 506 the ith processor 507 */ 508 ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 509 ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 510 ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 511 /* use 2 sends the first with all_a, the next with all_i and all_j */ 512 startv[0] = 0; starti[0] = 0; 513 for (i=1; i<size; i++) { 514 startv[i] = startv[i-1] + nlengths[i-1]; 515 starti[i] = starti[i-1] + 2*nlengths[i-1]; 516 } 517 518 i = 0; 519 space = stash->space_head; 520 while (space != PETSC_NULL) { 521 space_next = space->next; 522 sp_idx = space->idx; 523 sp_idy = space->idy; 524 sp_val = space->val; 525 for (l=0; l<space->local_used; l++) { 526 j = owner[i]; 527 if (bs2 == 1) { 528 svalues[startv[j]] = sp_val[l]; 529 } else { 530 PetscInt k; 531 PetscScalar *buf1,*buf2; 532 buf1 = svalues+bs2*startv[j]; 533 buf2 = space->val + bs2*l; 534 for (k=0; k<bs2; k++) buf1[k] = buf2[k]; 535 } 536 sindices[starti[j]] = sp_idx[l]; 537 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 538 startv[j]++; 539 starti[j]++; 540 i++; 541 } 542 space = space_next; 543 } 544 startv[0] = 0; 545 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; 546 547 for (i=0,count=0; i<size; i++) { 548 if (nprocs[i]) { 549 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 550 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 551 } 552 } 553 #if defined(PETSC_USE_INFO) 554 ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 555 for (i=0; i<size; i++) { 556 if (nprocs[i]) { 557 ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 558 } 559 } 560 #endif 561 ierr = PetscFree(nlengths);CHKERRQ(ierr); 562 ierr = PetscFree(owner);CHKERRQ(ierr); 563 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 564 ierr = PetscFree(nprocs);CHKERRQ(ierr); 565 566 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 567 ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 568 569 for (i=0; i<nreceives; i++) { 570 recv_waits[2*i] = recv_waits1[i]; 571 recv_waits[2*i+1] = recv_waits2[i]; 572 } 573 stash->recv_waits = recv_waits; 574 575 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 576 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 577 578 stash->svalues = svalues; 579 stash->sindices = sindices; 580 stash->rvalues = rvalues; 581 stash->rindices = rindices; 582 stash->send_waits = send_waits; 583 stash->nsends = nsends; 584 stash->nrecvs = nreceives; 585 stash->reproduce_count = 0; 586 PetscFunctionReturn(0); 587 } 588 589 /* 590 MatStashScatterGetMesg_Private - This function waits on the receives posted 591 in the function MatStashScatterBegin_Private() and returns one message at 592 a time to the calling function. If no messages are left, it indicates this 593 by setting flg = 0, else it sets flg = 1. 594 595 Input Parameters: 596 stash - the stash 597 598 Output Parameters: 599 nvals - the number of entries in the current message. 600 rows - an array of row indices (or blocked indices) corresponding to the values 601 cols - an array of columnindices (or blocked indices) corresponding to the values 602 vals - the values 603 flg - 0 indicates no more message left, and the current call has no values associated. 604 1 indicates that the current call successfully received a message, and the 605 other output parameters nvals,rows,cols,vals are set appropriately. 606 */ 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatStashScatterGetMesg_Private" 609 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg) 610 { 611 PetscErrorCode ierr; 612 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 613 PetscInt bs2; 614 MPI_Status recv_status; 615 PetscBool match_found = PETSC_FALSE; 616 617 PetscFunctionBegin; 618 *flg = 0; /* When a message is discovered this is reset to 1 */ 619 /* Return if no more messages to process */ 620 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(0); 621 622 bs2 = stash->bs*stash->bs; 623 /* If a matching pair of receives are found, process them, and return the data to 624 the calling function. Until then keep receiving messages */ 625 while (!match_found) { 626 CHKMEMQ; 627 if (stash->reproduce) { 628 i = stash->reproduce_count++; 629 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 630 } else { 631 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 632 } 633 CHKMEMQ; 634 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 635 636 /* Now pack the received message into a structure which is usable by others */ 637 if (i % 2) { 638 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 639 640 flg_v[2*recv_status.MPI_SOURCE] = i/2; 641 642 *nvals = *nvals/bs2; 643 } else { 644 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 645 646 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 647 648 *nvals = *nvals/2; /* This message has both row indices and col indices */ 649 } 650 651 /* Check if we have both messages from this proc */ 652 i1 = flg_v[2*recv_status.MPI_SOURCE]; 653 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 654 if (i1 != -1 && i2 != -1) { 655 *rows = stash->rindices[i2]; 656 *cols = *rows + *nvals; 657 *vals = stash->rvalues[i1]; 658 *flg = 1; 659 stash->nprocessed++; 660 match_found = PETSC_TRUE; 661 } 662 } 663 PetscFunctionReturn(0); 664 } 665