1 #define PETSCMAT_DLL 2 3 #include "private/matimpl.h" 4 5 #define DEFAULT_STASH_SIZE 10000 6 7 /* 8 MatStashCreate_Private - Creates a stash,currently used for all the parallel 9 matrix implementations. The stash is where elements of a matrix destined 10 to be stored on other processors are kept until matrix assembly is done. 11 12 This is a simple minded stash. Simply adds entries to end of stash. 13 14 Input Parameters: 15 comm - communicator, required for scatters. 16 bs - stash block size. used when stashing blocks of values 17 18 Output Parameters: 19 stash - the newly created stash 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatStashCreate_Private" 23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 24 { 25 PetscErrorCode ierr; 26 PetscInt max,*opt,nopt; 27 PetscTruth flg; 28 29 PetscFunctionBegin; 30 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 31 stash->comm = comm; 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36 37 nopt = stash->size; 38 ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 39 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 40 if (flg) { 41 if (nopt == 1) max = opt[0]; 42 else if (nopt == stash->size) max = opt[stash->rank]; 43 else if (stash->rank < nopt) max = opt[stash->rank]; 44 else max = 0; /* Use default */ 45 stash->umax = max; 46 } else { 47 stash->umax = 0; 48 } 49 ierr = PetscFree(opt);CHKERRQ(ierr); 50 if (bs <= 0) bs = 1; 51 52 stash->bs = bs; 53 stash->nmax = 0; 54 stash->oldnmax = 0; 55 stash->n = 0; 56 stash->reallocs = -1; 57 stash->space_head = 0; 58 stash->space = 0; 59 60 stash->send_waits = 0; 61 stash->recv_waits = 0; 62 stash->send_status = 0; 63 stash->nsends = 0; 64 stash->nrecvs = 0; 65 stash->svalues = 0; 66 stash->rvalues = 0; 67 stash->rindices = 0; 68 stash->nprocs = 0; 69 stash->nprocessed = 0; 70 PetscFunctionReturn(0); 71 } 72 73 /* 74 MatStashDestroy_Private - Destroy the stash 75 */ 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatStashDestroy_Private" 78 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 79 { 80 PetscErrorCode ierr; 81 82 PetscFunctionBegin; 83 if (stash->space_head){ 84 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 85 stash->space_head = 0; 86 stash->space = 0; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 /* 92 MatStashScatterEnd_Private - This is called as the fial stage of 93 scatter. The final stages of messagepassing is done here, and 94 all the memory used for messagepassing is cleanedu up. This 95 routine also resets the stash, and deallocates the memory used 96 for the stash. It also keeps track of the current memory usage 97 so that the same value can be used the next time through. 98 */ 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatStashScatterEnd_Private" 101 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 102 { 103 PetscErrorCode ierr; 104 PetscInt nsends=stash->nsends,bs2,oldnmax; 105 MPI_Status *send_status; 106 107 PetscFunctionBegin; 108 /* wait on sends */ 109 if (nsends) { 110 ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 111 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 112 ierr = PetscFree(send_status);CHKERRQ(ierr); 113 } 114 115 /* Now update nmaxold to be app 10% more than max n used, this way the 116 wastage of space is reduced the next time this stash is used. 117 Also update the oldmax, only if it increases */ 118 if (stash->n) { 119 bs2 = stash->bs*stash->bs; 120 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 121 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 122 } 123 124 stash->nmax = 0; 125 stash->n = 0; 126 stash->reallocs = -1; 127 stash->nprocessed = 0; 128 if (stash->space_head){ 129 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 130 stash->space_head = 0; 131 stash->space = 0; 132 } 133 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 134 stash->send_waits = 0; 135 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 136 stash->recv_waits = 0; 137 ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 138 stash->svalues = 0; 139 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 140 stash->rvalues = 0; 141 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 142 stash->rindices = 0; 143 ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 144 stash->nprocs = 0; 145 PetscFunctionReturn(0); 146 } 147 148 /* 149 MatStashGetInfo_Private - Gets the relavant statistics of the stash 150 151 Input Parameters: 152 stash - the stash 153 nstash - the size of the stash. Indicates the number of values stored. 154 reallocs - the number of additional mallocs incurred. 155 156 */ 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatStashGetInfo_Private" 159 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 160 { 161 PetscInt bs2 = stash->bs*stash->bs; 162 163 PetscFunctionBegin; 164 if (nstash) *nstash = stash->n*bs2; 165 if (reallocs) { 166 if (stash->reallocs < 0) *reallocs = 0; 167 else *reallocs = stash->reallocs; 168 } 169 PetscFunctionReturn(0); 170 } 171 172 /* 173 MatStashSetInitialSize_Private - Sets the initial size of the stash 174 175 Input Parameters: 176 stash - the stash 177 max - the value that is used as the max size of the stash. 178 this value is used while allocating memory. 179 */ 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatStashSetInitialSize_Private" 182 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 183 { 184 PetscFunctionBegin; 185 stash->umax = max; 186 PetscFunctionReturn(0); 187 } 188 189 /* MatStashExpand_Private - Expand the stash. This function is called 190 when the space in the stash is not sufficient to add the new values 191 being inserted into the stash. 192 193 Input Parameters: 194 stash - the stash 195 incr - the minimum increase requested 196 197 Notes: 198 This routine doubles the currently used memory. 199 */ 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatStashExpand_Private" 202 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 203 { 204 PetscErrorCode ierr; 205 PetscInt newnmax,bs2= stash->bs*stash->bs; 206 207 PetscFunctionBegin; 208 /* allocate a larger stash */ 209 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 210 if (stash->umax) newnmax = stash->umax/bs2; 211 else newnmax = DEFAULT_STASH_SIZE/bs2; 212 } else if (!stash->nmax) { /* resuing stash */ 213 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 214 else newnmax = stash->oldnmax/bs2; 215 } else newnmax = stash->nmax*2; 216 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 217 218 /* Get a MatStashSpace and attach it to stash */ 219 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 220 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 221 stash->space_head = stash->space; 222 } 223 224 stash->reallocs++; 225 stash->nmax = newnmax; 226 PetscFunctionReturn(0); 227 } 228 /* 229 MatStashValuesRow_Private - inserts values into the stash. This function 230 expects the values to be roworiented. Multiple columns belong to the same row 231 can be inserted with a single call to this function. 232 233 Input Parameters: 234 stash - the stash 235 row - the global row correspoiding to the values 236 n - the number of elements inserted. All elements belong to the above row. 237 idxn - the global column indices corresponding to each of the values. 238 values - the values inserted 239 */ 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatStashValuesRow_Private" 242 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries) 243 { 244 PetscErrorCode ierr; 245 PetscInt i,k,cnt = 0; 246 PetscMatStashSpace space=stash->space; 247 248 PetscFunctionBegin; 249 /* Check and see if we have sufficient memory */ 250 if (!space || space->local_remaining < n){ 251 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 252 } 253 space = stash->space; 254 k = space->local_used; 255 for (i=0; i<n; i++) { 256 if (ignorezeroentries && (values[i] == 0.0)) continue; 257 space->idx[k] = row; 258 space->idy[k] = idxn[i]; 259 space->val[k] = values[i]; 260 k++; 261 cnt++; 262 } 263 stash->n += cnt; 264 space->local_used += cnt; 265 space->local_remaining -= cnt; 266 PetscFunctionReturn(0); 267 } 268 269 /* 270 MatStashValuesCol_Private - inserts values into the stash. This function 271 expects the values to be columnoriented. Multiple columns belong to the same row 272 can be inserted with a single call to this function. 273 274 Input Parameters: 275 stash - the stash 276 row - the global row correspoiding to the values 277 n - the number of elements inserted. All elements belong to the above row. 278 idxn - the global column indices corresponding to each of the values. 279 values - the values inserted 280 stepval - the consecutive values are sepated by a distance of stepval. 281 this happens because the input is columnoriented. 282 */ 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatStashValuesCol_Private" 285 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries) 286 { 287 PetscErrorCode ierr; 288 PetscInt i,k,cnt = 0; 289 PetscMatStashSpace space=stash->space; 290 291 PetscFunctionBegin; 292 /* Check and see if we have sufficient memory */ 293 if (!space || space->local_remaining < n){ 294 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 295 } 296 space = stash->space; 297 k = space->local_used; 298 for (i=0; i<n; i++) { 299 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 300 space->idx[k] = row; 301 space->idy[k] = idxn[i]; 302 space->val[k] = values[i*stepval]; 303 k++; 304 cnt++; 305 } 306 stash->n += cnt; 307 space->local_used += cnt; 308 space->local_remaining -= cnt; 309 PetscFunctionReturn(0); 310 } 311 312 /* 313 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 314 This function expects the values to be roworiented. Multiple columns belong 315 to the same block-row can be inserted with a single call to this function. 316 This function extracts the sub-block of values based on the dimensions of 317 the original input block, and the row,col values corresponding to the blocks. 318 319 Input Parameters: 320 stash - the stash 321 row - the global block-row correspoiding to the values 322 n - the number of elements inserted. All elements belong to the above row. 323 idxn - the global block-column indices corresponding to each of the blocks of 324 values. Each block is of size bs*bs. 325 values - the values inserted 326 rmax - the number of block-rows in the original block. 327 cmax - the number of block-columsn on the original block. 328 idx - the index of the current block-row in the original block. 329 */ 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 332 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 333 { 334 PetscErrorCode ierr; 335 PetscInt i,j,k,bs2,bs=stash->bs,l; 336 const PetscScalar *vals; 337 PetscScalar *array; 338 PetscMatStashSpace space=stash->space; 339 340 PetscFunctionBegin; 341 if (!space || space->local_remaining < n){ 342 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 343 } 344 space = stash->space; 345 l = space->local_used; 346 bs2 = bs*bs; 347 for (i=0; i<n; i++) { 348 space->idx[l] = row; 349 space->idy[l] = idxn[i]; 350 /* Now copy over the block of values. Store the values column oriented. 351 This enables inserting multiple blocks belonging to a row with a single 352 funtion call */ 353 array = space->val + bs2*l; 354 vals = values + idx*bs2*n + bs*i; 355 for (j=0; j<bs; j++) { 356 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 357 array++; 358 vals += cmax*bs; 359 } 360 l++; 361 } 362 stash->n += n; 363 space->local_used += n; 364 space->local_remaining -= n; 365 PetscFunctionReturn(0); 366 } 367 368 /* 369 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 370 This function expects the values to be roworiented. Multiple columns belong 371 to the same block-row can be inserted with a single call to this function. 372 This function extracts the sub-block of values based on the dimensions of 373 the original input block, and the row,col values corresponding to the blocks. 374 375 Input Parameters: 376 stash - the stash 377 row - the global block-row correspoiding to the values 378 n - the number of elements inserted. All elements belong to the above row. 379 idxn - the global block-column indices corresponding to each of the blocks of 380 values. Each block is of size bs*bs. 381 values - the values inserted 382 rmax - the number of block-rows in the original block. 383 cmax - the number of block-columsn on the original block. 384 idx - the index of the current block-row in the original block. 385 */ 386 #undef __FUNCT__ 387 #define __FUNCT__ "MatStashValuesColBlocked_Private" 388 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 389 { 390 PetscErrorCode ierr; 391 PetscInt i,j,k,bs2,bs=stash->bs,l; 392 const PetscScalar *vals; 393 PetscScalar *array; 394 PetscMatStashSpace space=stash->space; 395 396 PetscFunctionBegin; 397 if (!space || space->local_remaining < n){ 398 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 399 } 400 space = stash->space; 401 l = space->local_used; 402 bs2 = bs*bs; 403 for (i=0; i<n; i++) { 404 space->idx[l] = row; 405 space->idy[l] = idxn[i]; 406 /* Now copy over the block of values. Store the values column oriented. 407 This enables inserting multiple blocks belonging to a row with a single 408 funtion call */ 409 array = space->val + bs2*l; 410 vals = values + idx*bs2*n + bs*i; 411 for (j=0; j<bs; j++) { 412 for (k=0; k<bs; k++) {array[k] = vals[k];} 413 array += bs; 414 vals += rmax*bs; 415 } 416 l++; 417 } 418 stash->n += n; 419 space->local_used += n; 420 space->local_remaining -= n; 421 PetscFunctionReturn(0); 422 } 423 /* 424 MatStashScatterBegin_Private - Initiates the transfer of values to the 425 correct owners. This function goes through the stash, and check the 426 owners of each stashed value, and sends the values off to the owner 427 processors. 428 429 Input Parameters: 430 stash - the stash 431 owners - an array of size 'no-of-procs' which gives the ownership range 432 for each node. 433 434 Notes: The 'owners' array in the cased of the blocked-stash has the 435 ranges specified blocked global indices, and for the regular stash in 436 the proper global indices. 437 */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatStashScatterBegin_Private" 440 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 441 { 442 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 443 PetscInt size=stash->size,nsends; 444 PetscErrorCode ierr; 445 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 446 PetscScalar **rvalues,*svalues; 447 MPI_Comm comm = stash->comm; 448 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 449 PetscMPIInt *nprocs,*nlengths,nreceives; 450 PetscInt *sp_idx,*sp_idy; 451 PetscScalar *sp_val; 452 PetscMatStashSpace space,space_next; 453 454 PetscFunctionBegin; 455 bs2 = stash->bs*stash->bs; 456 457 /* first count number of contributors to each processor */ 458 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 459 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); 460 ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 461 462 nlengths = nprocs+size; 463 i = j = 0; 464 lastidx = -1; 465 space = stash->space_head; 466 while (space != PETSC_NULL){ 467 space_next = space->next; 468 sp_idx = space->idx; 469 for (l=0; l<space->local_used; l++){ 470 /* if indices are NOT locally sorted, need to start search at the beginning */ 471 if (lastidx > (idx = sp_idx[l])) j = 0; 472 lastidx = idx; 473 for (; j<size; j++) { 474 if (idx >= owners[j] && idx < owners[j+1]) { 475 nlengths[j]++; owner[i] = j; break; 476 } 477 } 478 i++; 479 } 480 space = space_next; 481 } 482 /* Now check what procs get messages - and compute nsends. */ 483 for (i=0, nsends=0 ; i<size; i++) { 484 if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 485 } 486 487 {PetscMPIInt *onodes,*olengths; 488 /* Determine the number of messages to expect, their lengths, from from-ids */ 489 ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 490 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 491 /* since clubbing row,col - lengths are multiplied by 2 */ 492 for (i=0; i<nreceives; i++) olengths[i] *=2; 493 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 494 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 495 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 496 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 497 ierr = PetscFree(onodes);CHKERRQ(ierr); 498 ierr = PetscFree(olengths);CHKERRQ(ierr); 499 } 500 501 /* do sends: 502 1) starts[i] gives the starting index in svalues for stuff going to 503 the ith processor 504 */ 505 ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 506 sindices = (PetscInt*)(svalues + bs2*stash->n); 507 ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 508 ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 509 starti = startv + size; 510 /* use 2 sends the first with all_a, the next with all_i and all_j */ 511 startv[0] = 0; starti[0] = 0; 512 for (i=1; i<size; i++) { 513 startv[i] = startv[i-1] + nlengths[i-1]; 514 starti[i] = starti[i-1] + nlengths[i-1]*2; 515 } 516 517 i = 0; 518 space = stash->space_head; 519 while (space != PETSC_NULL){ 520 space_next = space->next; 521 sp_idx = space->idx; 522 sp_idy = space->idy; 523 sp_val = space->val; 524 for (l=0; l<space->local_used; l++){ 525 j = owner[i]; 526 if (bs2 == 1) { 527 svalues[startv[j]] = sp_val[l]; 528 } else { 529 PetscInt k; 530 PetscScalar *buf1,*buf2; 531 buf1 = svalues+bs2*startv[j]; 532 buf2 = space->val + bs2*l; 533 for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 534 } 535 sindices[starti[j]] = sp_idx[l]; 536 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 537 startv[j]++; 538 starti[j]++; 539 i++; 540 } 541 space = space_next; 542 } 543 startv[0] = 0; 544 for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 545 546 for (i=0,count=0; i<size; i++) { 547 if (nprocs[i]) { 548 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 549 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 550 } 551 } 552 #if defined(PETSC_USE_INFO) 553 ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 554 for (i=0; i<size; i++) { 555 if (nprocs[i]) { 556 ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 557 } 558 } 559 #endif 560 ierr = PetscFree(owner);CHKERRQ(ierr); 561 ierr = PetscFree(startv);CHKERRQ(ierr); 562 /* This memory is reused in scatter end for a different purpose*/ 563 for (i=0; i<2*size; i++) nprocs[i] = -1; 564 stash->nprocs = nprocs; 565 566 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 567 ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 568 569 for (i=0; i<nreceives; i++) { 570 recv_waits[2*i] = recv_waits1[i]; 571 recv_waits[2*i+1] = recv_waits2[i]; 572 } 573 stash->recv_waits = recv_waits; 574 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 575 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 576 577 stash->svalues = svalues; stash->rvalues = rvalues; 578 stash->rindices = rindices; stash->send_waits = send_waits; 579 stash->nsends = nsends; stash->nrecvs = nreceives; 580 PetscFunctionReturn(0); 581 } 582 583 /* 584 MatStashScatterGetMesg_Private - This function waits on the receives posted 585 in the function MatStashScatterBegin_Private() and returns one message at 586 a time to the calling function. If no messages are left, it indicates this 587 by setting flg = 0, else it sets flg = 1. 588 589 Input Parameters: 590 stash - the stash 591 592 Output Parameters: 593 nvals - the number of entries in the current message. 594 rows - an array of row indices (or blocked indices) corresponding to the values 595 cols - an array of columnindices (or blocked indices) corresponding to the values 596 vals - the values 597 flg - 0 indicates no more message left, and the current call has no values associated. 598 1 indicates that the current call successfully received a message, and the 599 other output parameters nvals,rows,cols,vals are set appropriately. 600 */ 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatStashScatterGetMesg_Private" 603 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 604 { 605 PetscErrorCode ierr; 606 PetscMPIInt i,*flg_v,i1,i2; 607 PetscInt bs2; 608 MPI_Status recv_status; 609 PetscTruth match_found = PETSC_FALSE; 610 611 PetscFunctionBegin; 612 613 *flg = 0; /* When a message is discovered this is reset to 1 */ 614 /* Return if no more messages to process */ 615 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 616 617 flg_v = stash->nprocs; 618 bs2 = stash->bs*stash->bs; 619 /* If a matching pair of receieves are found, process them, and return the data to 620 the calling function. Until then keep receiving messages */ 621 while (!match_found) { 622 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 623 /* Now pack the received message into a structure which is useable by others */ 624 if (i % 2) { 625 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 626 flg_v[2*recv_status.MPI_SOURCE] = i/2; 627 *nvals = *nvals/bs2; 628 } else { 629 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 630 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 631 *nvals = *nvals/2; /* This message has both row indices and col indices */ 632 } 633 634 /* Check if we have both messages from this proc */ 635 i1 = flg_v[2*recv_status.MPI_SOURCE]; 636 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 637 if (i1 != -1 && i2 != -1) { 638 *rows = stash->rindices[i2]; 639 *cols = *rows + *nvals; 640 *vals = stash->rvalues[i1]; 641 *flg = 1; 642 stash->nprocessed ++; 643 match_found = PETSC_TRUE; 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648