1 #define PETSCMAT_DLL 2 3 #include "private/matimpl.h" 4 5 #define DEFAULT_STASH_SIZE 10000 6 7 /* 8 MatStashCreate_Private - Creates a stash,currently used for all the parallel 9 matrix implementations. The stash is where elements of a matrix destined 10 to be stored on other processors are kept until matrix assembly is done. 11 12 This is a simple minded stash. Simply adds entries to end of stash. 13 14 Input Parameters: 15 comm - communicator, required for scatters. 16 bs - stash block size. used when stashing blocks of values 17 18 Output Parameters: 19 stash - the newly created stash 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatStashCreate_Private" 23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 24 { 25 PetscErrorCode ierr; 26 PetscInt max,*opt,nopt; 27 PetscTruth flg; 28 29 PetscFunctionBegin; 30 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 31 stash->comm = comm; 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36 37 nopt = stash->size; 38 ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 39 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 40 if (flg) { 41 if (nopt == 1) max = opt[0]; 42 else if (nopt == stash->size) max = opt[stash->rank]; 43 else if (stash->rank < nopt) max = opt[stash->rank]; 44 else max = 0; /* Use default */ 45 stash->umax = max; 46 } else { 47 stash->umax = 0; 48 } 49 ierr = PetscFree(opt);CHKERRQ(ierr); 50 if (bs <= 0) bs = 1; 51 52 stash->bs = bs; 53 stash->nmax = 0; 54 stash->oldnmax = 0; 55 stash->n = 0; 56 stash->reallocs = -1; 57 stash->space_head = 0; 58 stash->space = 0; 59 60 stash->send_waits = 0; 61 stash->recv_waits = 0; 62 stash->send_status = 0; 63 stash->nsends = 0; 64 stash->nrecvs = 0; 65 stash->svalues = 0; 66 stash->rvalues = 0; 67 stash->rindices = 0; 68 stash->nprocessed = 0; 69 PetscFunctionReturn(0); 70 } 71 72 /* 73 MatStashDestroy_Private - Destroy the stash 74 */ 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatStashDestroy_Private" 77 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (stash->space_head){ 83 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 84 stash->space_head = 0; 85 stash->space = 0; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 /* 91 MatStashScatterEnd_Private - This is called as the fial stage of 92 scatter. The final stages of messagepassing is done here, and 93 all the memory used for messagepassing is cleanedu up. This 94 routine also resets the stash, and deallocates the memory used 95 for the stash. It also keeps track of the current memory usage 96 so that the same value can be used the next time through. 97 */ 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatStashScatterEnd_Private" 100 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 101 { 102 PetscErrorCode ierr; 103 PetscInt nsends=stash->nsends,bs2,oldnmax; 104 MPI_Status *send_status; 105 106 PetscFunctionBegin; 107 /* wait on sends */ 108 if (nsends) { 109 ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 110 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 111 ierr = PetscFree(send_status);CHKERRQ(ierr); 112 } 113 114 /* Now update nmaxold to be app 10% more than max n used, this way the 115 wastage of space is reduced the next time this stash is used. 116 Also update the oldmax, only if it increases */ 117 if (stash->n) { 118 bs2 = stash->bs*stash->bs; 119 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 120 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 121 } 122 123 stash->nmax = 0; 124 stash->n = 0; 125 stash->reallocs = -1; 126 stash->nprocessed = 0; 127 if (stash->space_head){ 128 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 129 stash->space_head = 0; 130 stash->space = 0; 131 } 132 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 133 stash->send_waits = 0; 134 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 135 stash->recv_waits = 0; 136 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 137 stash->svalues = 0; 138 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 139 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 140 stash->rvalues = 0; 141 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 142 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 143 stash->rindices = 0; 144 PetscFunctionReturn(0); 145 } 146 147 /* 148 MatStashGetInfo_Private - Gets the relavant statistics of the stash 149 150 Input Parameters: 151 stash - the stash 152 nstash - the size of the stash. Indicates the number of values stored. 153 reallocs - the number of additional mallocs incurred. 154 155 */ 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatStashGetInfo_Private" 158 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 159 { 160 PetscInt bs2 = stash->bs*stash->bs; 161 162 PetscFunctionBegin; 163 if (nstash) *nstash = stash->n*bs2; 164 if (reallocs) { 165 if (stash->reallocs < 0) *reallocs = 0; 166 else *reallocs = stash->reallocs; 167 } 168 PetscFunctionReturn(0); 169 } 170 171 /* 172 MatStashSetInitialSize_Private - Sets the initial size of the stash 173 174 Input Parameters: 175 stash - the stash 176 max - the value that is used as the max size of the stash. 177 this value is used while allocating memory. 178 */ 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatStashSetInitialSize_Private" 181 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 182 { 183 PetscFunctionBegin; 184 stash->umax = max; 185 PetscFunctionReturn(0); 186 } 187 188 /* MatStashExpand_Private - Expand the stash. This function is called 189 when the space in the stash is not sufficient to add the new values 190 being inserted into the stash. 191 192 Input Parameters: 193 stash - the stash 194 incr - the minimum increase requested 195 196 Notes: 197 This routine doubles the currently used memory. 198 */ 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatStashExpand_Private" 201 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 202 { 203 PetscErrorCode ierr; 204 PetscInt newnmax,bs2= stash->bs*stash->bs; 205 206 PetscFunctionBegin; 207 /* allocate a larger stash */ 208 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 209 if (stash->umax) newnmax = stash->umax/bs2; 210 else newnmax = DEFAULT_STASH_SIZE/bs2; 211 } else if (!stash->nmax) { /* resuing stash */ 212 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 213 else newnmax = stash->oldnmax/bs2; 214 } else newnmax = stash->nmax*2; 215 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 216 217 /* Get a MatStashSpace and attach it to stash */ 218 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 219 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 220 stash->space_head = stash->space; 221 } 222 223 stash->reallocs++; 224 stash->nmax = newnmax; 225 PetscFunctionReturn(0); 226 } 227 /* 228 MatStashValuesRow_Private - inserts values into the stash. This function 229 expects the values to be roworiented. Multiple columns belong to the same row 230 can be inserted with a single call to this function. 231 232 Input Parameters: 233 stash - the stash 234 row - the global row correspoiding to the values 235 n - the number of elements inserted. All elements belong to the above row. 236 idxn - the global column indices corresponding to each of the values. 237 values - the values inserted 238 */ 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatStashValuesRow_Private" 241 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries) 242 { 243 PetscErrorCode ierr; 244 PetscInt i,k,cnt = 0; 245 PetscMatStashSpace space=stash->space; 246 247 PetscFunctionBegin; 248 /* Check and see if we have sufficient memory */ 249 if (!space || space->local_remaining < n){ 250 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 251 } 252 space = stash->space; 253 k = space->local_used; 254 for (i=0; i<n; i++) { 255 if (ignorezeroentries && (values[i] == 0.0)) continue; 256 space->idx[k] = row; 257 space->idy[k] = idxn[i]; 258 space->val[k] = values[i]; 259 k++; 260 cnt++; 261 } 262 stash->n += cnt; 263 space->local_used += cnt; 264 space->local_remaining -= cnt; 265 PetscFunctionReturn(0); 266 } 267 268 /* 269 MatStashValuesCol_Private - inserts values into the stash. This function 270 expects the values to be columnoriented. Multiple columns belong to the same row 271 can be inserted with a single call to this function. 272 273 Input Parameters: 274 stash - the stash 275 row - the global row correspoiding to the values 276 n - the number of elements inserted. All elements belong to the above row. 277 idxn - the global column indices corresponding to each of the values. 278 values - the values inserted 279 stepval - the consecutive values are sepated by a distance of stepval. 280 this happens because the input is columnoriented. 281 */ 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatStashValuesCol_Private" 284 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries) 285 { 286 PetscErrorCode ierr; 287 PetscInt i,k,cnt = 0; 288 PetscMatStashSpace space=stash->space; 289 290 PetscFunctionBegin; 291 /* Check and see if we have sufficient memory */ 292 if (!space || space->local_remaining < n){ 293 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 294 } 295 space = stash->space; 296 k = space->local_used; 297 for (i=0; i<n; i++) { 298 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 299 space->idx[k] = row; 300 space->idy[k] = idxn[i]; 301 space->val[k] = values[i*stepval]; 302 k++; 303 cnt++; 304 } 305 stash->n += cnt; 306 space->local_used += cnt; 307 space->local_remaining -= cnt; 308 PetscFunctionReturn(0); 309 } 310 311 /* 312 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 313 This function expects the values to be roworiented. Multiple columns belong 314 to the same block-row can be inserted with a single call to this function. 315 This function extracts the sub-block of values based on the dimensions of 316 the original input block, and the row,col values corresponding to the blocks. 317 318 Input Parameters: 319 stash - the stash 320 row - the global block-row correspoiding to the values 321 n - the number of elements inserted. All elements belong to the above row. 322 idxn - the global block-column indices corresponding to each of the blocks of 323 values. Each block is of size bs*bs. 324 values - the values inserted 325 rmax - the number of block-rows in the original block. 326 cmax - the number of block-columsn on the original block. 327 idx - the index of the current block-row in the original block. 328 */ 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 331 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 332 { 333 PetscErrorCode ierr; 334 PetscInt i,j,k,bs2,bs=stash->bs,l; 335 const PetscScalar *vals; 336 PetscScalar *array; 337 PetscMatStashSpace space=stash->space; 338 339 PetscFunctionBegin; 340 if (!space || space->local_remaining < n){ 341 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 342 } 343 space = stash->space; 344 l = space->local_used; 345 bs2 = bs*bs; 346 for (i=0; i<n; i++) { 347 space->idx[l] = row; 348 space->idy[l] = idxn[i]; 349 /* Now copy over the block of values. Store the values column oriented. 350 This enables inserting multiple blocks belonging to a row with a single 351 funtion call */ 352 array = space->val + bs2*l; 353 vals = values + idx*bs2*n + bs*i; 354 for (j=0; j<bs; j++) { 355 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 356 array++; 357 vals += cmax*bs; 358 } 359 l++; 360 } 361 stash->n += n; 362 space->local_used += n; 363 space->local_remaining -= n; 364 PetscFunctionReturn(0); 365 } 366 367 /* 368 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 369 This function expects the values to be roworiented. Multiple columns belong 370 to the same block-row can be inserted with a single call to this function. 371 This function extracts the sub-block of values based on the dimensions of 372 the original input block, and the row,col values corresponding to the blocks. 373 374 Input Parameters: 375 stash - the stash 376 row - the global block-row correspoiding to the values 377 n - the number of elements inserted. All elements belong to the above row. 378 idxn - the global block-column indices corresponding to each of the blocks of 379 values. Each block is of size bs*bs. 380 values - the values inserted 381 rmax - the number of block-rows in the original block. 382 cmax - the number of block-columsn on the original block. 383 idx - the index of the current block-row in the original block. 384 */ 385 #undef __FUNCT__ 386 #define __FUNCT__ "MatStashValuesColBlocked_Private" 387 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 388 { 389 PetscErrorCode ierr; 390 PetscInt i,j,k,bs2,bs=stash->bs,l; 391 const PetscScalar *vals; 392 PetscScalar *array; 393 PetscMatStashSpace space=stash->space; 394 395 PetscFunctionBegin; 396 if (!space || space->local_remaining < n){ 397 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 398 } 399 space = stash->space; 400 l = space->local_used; 401 bs2 = bs*bs; 402 for (i=0; i<n; i++) { 403 space->idx[l] = row; 404 space->idy[l] = idxn[i]; 405 /* Now copy over the block of values. Store the values column oriented. 406 This enables inserting multiple blocks belonging to a row with a single 407 funtion call */ 408 array = space->val + bs2*l; 409 vals = values + idx*bs2*n + bs*i; 410 for (j=0; j<bs; j++) { 411 for (k=0; k<bs; k++) {array[k] = vals[k];} 412 array += bs; 413 vals += rmax*bs; 414 } 415 l++; 416 } 417 stash->n += n; 418 space->local_used += n; 419 space->local_remaining -= n; 420 PetscFunctionReturn(0); 421 } 422 /* 423 MatStashScatterBegin_Private - Initiates the transfer of values to the 424 correct owners. This function goes through the stash, and check the 425 owners of each stashed value, and sends the values off to the owner 426 processors. 427 428 Input Parameters: 429 stash - the stash 430 owners - an array of size 'no-of-procs' which gives the ownership range 431 for each node. 432 433 Notes: The 'owners' array in the cased of the blocked-stash has the 434 ranges specified blocked global indices, and for the regular stash in 435 the proper global indices. 436 */ 437 #undef __FUNCT__ 438 #define __FUNCT__ "MatStashScatterBegin_Private" 439 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 440 { 441 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 442 PetscInt size=stash->size,nsends; 443 PetscErrorCode ierr; 444 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 445 PetscScalar **rvalues,*svalues; 446 MPI_Comm comm = stash->comm; 447 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 448 PetscMPIInt *nprocs,*nlengths,nreceives; 449 PetscInt *sp_idx,*sp_idy; 450 PetscScalar *sp_val; 451 PetscMatStashSpace space,space_next; 452 453 PetscFunctionBegin; 454 bs2 = stash->bs*stash->bs; 455 456 /* first count number of contributors to each processor */ 457 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 458 ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 459 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 460 ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 461 ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 462 463 i = j = 0; 464 lastidx = -1; 465 space = stash->space_head; 466 while (space != PETSC_NULL){ 467 space_next = space->next; 468 sp_idx = space->idx; 469 for (l=0; l<space->local_used; l++){ 470 /* if indices are NOT locally sorted, need to start search at the beginning */ 471 if (lastidx > (idx = sp_idx[l])) j = 0; 472 lastidx = idx; 473 for (; j<size; j++) { 474 if (idx >= owners[j] && idx < owners[j+1]) { 475 nlengths[j]++; owner[i] = j; break; 476 } 477 } 478 i++; 479 } 480 space = space_next; 481 } 482 /* Now check what procs get messages - and compute nsends. */ 483 for (i=0, nsends=0 ; i<size; i++) { 484 if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 485 } 486 487 {PetscMPIInt *onodes,*olengths; 488 /* Determine the number of messages to expect, their lengths, from from-ids */ 489 ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 490 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 491 /* since clubbing row,col - lengths are multiplied by 2 */ 492 for (i=0; i<nreceives; i++) olengths[i] *=2; 493 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 494 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 495 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 496 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 497 ierr = PetscFree(onodes);CHKERRQ(ierr); 498 ierr = PetscFree(olengths);CHKERRQ(ierr); 499 } 500 501 /* do sends: 502 1) starts[i] gives the starting index in svalues for stuff going to 503 the ith processor 504 */ 505 ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 506 ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 507 ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 508 /* use 2 sends the first with all_a, the next with all_i and all_j */ 509 startv[0] = 0; starti[0] = 0; 510 for (i=1; i<size; i++) { 511 startv[i] = startv[i-1] + nlengths[i-1]; 512 starti[i] = starti[i-1] + nlengths[i-1]*2; 513 } 514 515 i = 0; 516 space = stash->space_head; 517 while (space != PETSC_NULL){ 518 space_next = space->next; 519 sp_idx = space->idx; 520 sp_idy = space->idy; 521 sp_val = space->val; 522 for (l=0; l<space->local_used; l++){ 523 j = owner[i]; 524 if (bs2 == 1) { 525 svalues[startv[j]] = sp_val[l]; 526 } else { 527 PetscInt k; 528 PetscScalar *buf1,*buf2; 529 buf1 = svalues+bs2*startv[j]; 530 buf2 = space->val + bs2*l; 531 for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 532 } 533 sindices[starti[j]] = sp_idx[l]; 534 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 535 startv[j]++; 536 starti[j]++; 537 i++; 538 } 539 space = space_next; 540 } 541 startv[0] = 0; 542 for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 543 544 for (i=0,count=0; i<size; i++) { 545 if (nprocs[i]) { 546 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 547 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 548 } 549 } 550 #if defined(PETSC_USE_INFO) 551 ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 552 for (i=0; i<size; i++) { 553 if (nprocs[i]) { 554 ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 555 } 556 } 557 #endif 558 ierr = PetscFree(nlengths);CHKERRQ(ierr); 559 ierr = PetscFree(owner);CHKERRQ(ierr); 560 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 561 ierr = PetscFree(nprocs);CHKERRQ(ierr); 562 563 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 564 ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 565 566 for (i=0; i<nreceives; i++) { 567 recv_waits[2*i] = recv_waits1[i]; 568 recv_waits[2*i+1] = recv_waits2[i]; 569 } 570 stash->recv_waits = recv_waits; 571 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 572 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 573 574 stash->svalues = svalues; 575 stash->sindices = sindices; 576 stash->rvalues = rvalues; 577 stash->rindices = rindices; 578 stash->send_waits = send_waits; 579 stash->nsends = nsends; 580 stash->nrecvs = nreceives; 581 PetscFunctionReturn(0); 582 } 583 584 /* 585 MatStashScatterGetMesg_Private - This function waits on the receives posted 586 in the function MatStashScatterBegin_Private() and returns one message at 587 a time to the calling function. If no messages are left, it indicates this 588 by setting flg = 0, else it sets flg = 1. 589 590 Input Parameters: 591 stash - the stash 592 593 Output Parameters: 594 nvals - the number of entries in the current message. 595 rows - an array of row indices (or blocked indices) corresponding to the values 596 cols - an array of columnindices (or blocked indices) corresponding to the values 597 vals - the values 598 flg - 0 indicates no more message left, and the current call has no values associated. 599 1 indicates that the current call successfully received a message, and the 600 other output parameters nvals,rows,cols,vals are set appropriately. 601 */ 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatStashScatterGetMesg_Private" 604 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 605 { 606 PetscErrorCode ierr; 607 PetscMPIInt i,*flg_v,i1,i2,size; 608 PetscInt bs2; 609 MPI_Status recv_status; 610 PetscTruth match_found = PETSC_FALSE; 611 612 PetscFunctionBegin; 613 614 *flg = 0; /* When a message is discovered this is reset to 1 */ 615 /* Return if no more messages to process */ 616 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 617 618 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 619 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&flg_v);CHKERRQ(ierr); 620 for (i=0; i<2*size; i++) flg_v[i] = -1; 621 622 bs2 = stash->bs*stash->bs; 623 /* If a matching pair of receieves are found, process them, and return the data to 624 the calling function. Until then keep receiving messages */ 625 while (!match_found) { 626 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 627 /* Now pack the received message into a structure which is useable by others */ 628 if (i % 2) { 629 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 630 flg_v[2*recv_status.MPI_SOURCE] = i/2; 631 *nvals = *nvals/bs2; 632 } else { 633 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 634 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 635 *nvals = *nvals/2; /* This message has both row indices and col indices */ 636 } 637 638 /* Check if we have both messages from this proc */ 639 i1 = flg_v[2*recv_status.MPI_SOURCE]; 640 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 641 if (i1 != -1 && i2 != -1) { 642 *rows = stash->rindices[i2]; 643 *cols = *rows + *nvals; 644 *vals = stash->rvalues[i1]; 645 *flg = 1; 646 stash->nprocessed ++; 647 match_found = PETSC_TRUE; 648 } 649 } 650 ierr = PetscFree(flg_v);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653