1 #define PETSCMAT_DLL 2 3 #include "private/matimpl.h" 4 5 #define DEFAULT_STASH_SIZE 10000 6 7 /* 8 MatStashCreate_Private - Creates a stash,currently used for all the parallel 9 matrix implementations. The stash is where elements of a matrix destined 10 to be stored on other processors are kept until matrix assembly is done. 11 12 This is a simple minded stash. Simply adds entries to end of stash. 13 14 Input Parameters: 15 comm - communicator, required for scatters. 16 bs - stash block size. used when stashing blocks of values 17 18 Output Parameters: 19 stash - the newly created stash 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatStashCreate_Private" 23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 24 { 25 PetscErrorCode ierr; 26 PetscInt max,*opt,nopt,i; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 31 stash->comm = comm; 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36 ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); 37 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 38 39 40 nopt = stash->size; 41 ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 42 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 43 if (flg) { 44 if (nopt == 1) max = opt[0]; 45 else if (nopt == stash->size) max = opt[stash->rank]; 46 else if (stash->rank < nopt) max = opt[stash->rank]; 47 else max = 0; /* Use default */ 48 stash->umax = max; 49 } else { 50 stash->umax = 0; 51 } 52 ierr = PetscFree(opt);CHKERRQ(ierr); 53 if (bs <= 0) bs = 1; 54 55 stash->bs = bs; 56 stash->nmax = 0; 57 stash->oldnmax = 0; 58 stash->n = 0; 59 stash->reallocs = -1; 60 stash->space_head = 0; 61 stash->space = 0; 62 63 stash->send_waits = 0; 64 stash->recv_waits = 0; 65 stash->send_status = 0; 66 stash->nsends = 0; 67 stash->nrecvs = 0; 68 stash->svalues = 0; 69 stash->rvalues = 0; 70 stash->rindices = 0; 71 stash->nprocessed = 0; 72 73 stash->reproduce = PETSC_FALSE; 74 ierr = PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 MatStashDestroy_Private - Destroy the stash 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatStashDestroy_Private" 83 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 84 { 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 if (stash->space_head){ 89 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 90 stash->space_head = 0; 91 stash->space = 0; 92 } 93 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 /* 98 MatStashScatterEnd_Private - This is called as the final stage of 99 scatter. The final stages of message passing is done here, and 100 all the memory used for message passing is cleaned up. This 101 routine also resets the stash, and deallocates the memory used 102 for the stash. It also keeps track of the current memory usage 103 so that the same value can be used the next time through. 104 */ 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatStashScatterEnd_Private" 107 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 108 { 109 PetscErrorCode ierr; 110 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 111 MPI_Status *send_status; 112 113 PetscFunctionBegin; 114 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 115 /* wait on sends */ 116 if (nsends) { 117 ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 118 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 119 ierr = PetscFree(send_status);CHKERRQ(ierr); 120 } 121 122 /* Now update nmaxold to be app 10% more than max n used, this way the 123 wastage of space is reduced the next time this stash is used. 124 Also update the oldmax, only if it increases */ 125 if (stash->n) { 126 bs2 = stash->bs*stash->bs; 127 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 128 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 129 } 130 131 stash->nmax = 0; 132 stash->n = 0; 133 stash->reallocs = -1; 134 stash->nprocessed = 0; 135 if (stash->space_head){ 136 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 137 stash->space_head = 0; 138 stash->space = 0; 139 } 140 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 141 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 142 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 143 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 144 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 145 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 146 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 /* 151 MatStashGetInfo_Private - Gets the relavant statistics of the stash 152 153 Input Parameters: 154 stash - the stash 155 nstash - the size of the stash. Indicates the number of values stored. 156 reallocs - the number of additional mallocs incurred. 157 158 */ 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatStashGetInfo_Private" 161 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 162 { 163 PetscInt bs2 = stash->bs*stash->bs; 164 165 PetscFunctionBegin; 166 if (nstash) *nstash = stash->n*bs2; 167 if (reallocs) { 168 if (stash->reallocs < 0) *reallocs = 0; 169 else *reallocs = stash->reallocs; 170 } 171 PetscFunctionReturn(0); 172 } 173 174 /* 175 MatStashSetInitialSize_Private - Sets the initial size of the stash 176 177 Input Parameters: 178 stash - the stash 179 max - the value that is used as the max size of the stash. 180 this value is used while allocating memory. 181 */ 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatStashSetInitialSize_Private" 184 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 185 { 186 PetscFunctionBegin; 187 stash->umax = max; 188 PetscFunctionReturn(0); 189 } 190 191 /* MatStashExpand_Private - Expand the stash. This function is called 192 when the space in the stash is not sufficient to add the new values 193 being inserted into the stash. 194 195 Input Parameters: 196 stash - the stash 197 incr - the minimum increase requested 198 199 Notes: 200 This routine doubles the currently used memory. 201 */ 202 #undef __FUNCT__ 203 #define __FUNCT__ "MatStashExpand_Private" 204 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 205 { 206 PetscErrorCode ierr; 207 PetscInt newnmax,bs2= stash->bs*stash->bs; 208 209 PetscFunctionBegin; 210 /* allocate a larger stash */ 211 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 212 if (stash->umax) newnmax = stash->umax/bs2; 213 else newnmax = DEFAULT_STASH_SIZE/bs2; 214 } else if (!stash->nmax) { /* resuing stash */ 215 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 216 else newnmax = stash->oldnmax/bs2; 217 } else newnmax = stash->nmax*2; 218 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 219 220 /* Get a MatStashSpace and attach it to stash */ 221 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 222 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 223 stash->space_head = stash->space; 224 } 225 226 stash->reallocs++; 227 stash->nmax = newnmax; 228 PetscFunctionReturn(0); 229 } 230 /* 231 MatStashValuesRow_Private - inserts values into the stash. This function 232 expects the values to be roworiented. Multiple columns belong to the same row 233 can be inserted with a single call to this function. 234 235 Input Parameters: 236 stash - the stash 237 row - the global row correspoiding to the values 238 n - the number of elements inserted. All elements belong to the above row. 239 idxn - the global column indices corresponding to each of the values. 240 values - the values inserted 241 */ 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatStashValuesRow_Private" 244 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 245 { 246 PetscErrorCode ierr; 247 PetscInt i,k,cnt = 0; 248 PetscMatStashSpace space=stash->space; 249 250 PetscFunctionBegin; 251 /* Check and see if we have sufficient memory */ 252 if (!space || space->local_remaining < n){ 253 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 254 } 255 space = stash->space; 256 k = space->local_used; 257 for (i=0; i<n; i++) { 258 if (ignorezeroentries && (values[i] == 0.0)) continue; 259 space->idx[k] = row; 260 space->idy[k] = idxn[i]; 261 space->val[k] = values[i]; 262 k++; 263 cnt++; 264 } 265 stash->n += cnt; 266 space->local_used += cnt; 267 space->local_remaining -= cnt; 268 PetscFunctionReturn(0); 269 } 270 271 /* 272 MatStashValuesCol_Private - inserts values into the stash. This function 273 expects the values to be columnoriented. Multiple columns belong to the same row 274 can be inserted with a single call to this function. 275 276 Input Parameters: 277 stash - the stash 278 row - the global row correspoiding to the values 279 n - the number of elements inserted. All elements belong to the above row. 280 idxn - the global column indices corresponding to each of the values. 281 values - the values inserted 282 stepval - the consecutive values are sepated by a distance of stepval. 283 this happens because the input is columnoriented. 284 */ 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatStashValuesCol_Private" 287 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 288 { 289 PetscErrorCode ierr; 290 PetscInt i,k,cnt = 0; 291 PetscMatStashSpace space=stash->space; 292 293 PetscFunctionBegin; 294 /* Check and see if we have sufficient memory */ 295 if (!space || space->local_remaining < n){ 296 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 297 } 298 space = stash->space; 299 k = space->local_used; 300 for (i=0; i<n; i++) { 301 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 302 space->idx[k] = row; 303 space->idy[k] = idxn[i]; 304 space->val[k] = values[i*stepval]; 305 k++; 306 cnt++; 307 } 308 stash->n += cnt; 309 space->local_used += cnt; 310 space->local_remaining -= cnt; 311 PetscFunctionReturn(0); 312 } 313 314 /* 315 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 316 This function expects the values to be roworiented. Multiple columns belong 317 to the same block-row can be inserted with a single call to this function. 318 This function extracts the sub-block of values based on the dimensions of 319 the original input block, and the row,col values corresponding to the blocks. 320 321 Input Parameters: 322 stash - the stash 323 row - the global block-row correspoiding to the values 324 n - the number of elements inserted. All elements belong to the above row. 325 idxn - the global block-column indices corresponding to each of the blocks of 326 values. Each block is of size bs*bs. 327 values - the values inserted 328 rmax - the number of block-rows in the original block. 329 cmax - the number of block-columsn on the original block. 330 idx - the index of the current block-row in the original block. 331 */ 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 334 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 335 { 336 PetscErrorCode ierr; 337 PetscInt i,j,k,bs2,bs=stash->bs,l; 338 const PetscScalar *vals; 339 PetscScalar *array; 340 PetscMatStashSpace space=stash->space; 341 342 PetscFunctionBegin; 343 if (!space || space->local_remaining < n){ 344 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 345 } 346 space = stash->space; 347 l = space->local_used; 348 bs2 = bs*bs; 349 for (i=0; i<n; i++) { 350 space->idx[l] = row; 351 space->idy[l] = idxn[i]; 352 /* Now copy over the block of values. Store the values column oriented. 353 This enables inserting multiple blocks belonging to a row with a single 354 funtion call */ 355 array = space->val + bs2*l; 356 vals = values + idx*bs2*n + bs*i; 357 for (j=0; j<bs; j++) { 358 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 359 array++; 360 vals += cmax*bs; 361 } 362 l++; 363 } 364 stash->n += n; 365 space->local_used += n; 366 space->local_remaining -= n; 367 PetscFunctionReturn(0); 368 } 369 370 /* 371 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 372 This function expects the values to be roworiented. Multiple columns belong 373 to the same block-row can be inserted with a single call to this function. 374 This function extracts the sub-block of values based on the dimensions of 375 the original input block, and the row,col values corresponding to the blocks. 376 377 Input Parameters: 378 stash - the stash 379 row - the global block-row correspoiding to the values 380 n - the number of elements inserted. All elements belong to the above row. 381 idxn - the global block-column indices corresponding to each of the blocks of 382 values. Each block is of size bs*bs. 383 values - the values inserted 384 rmax - the number of block-rows in the original block. 385 cmax - the number of block-columsn on the original block. 386 idx - the index of the current block-row in the original block. 387 */ 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatStashValuesColBlocked_Private" 390 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 391 { 392 PetscErrorCode ierr; 393 PetscInt i,j,k,bs2,bs=stash->bs,l; 394 const PetscScalar *vals; 395 PetscScalar *array; 396 PetscMatStashSpace space=stash->space; 397 398 PetscFunctionBegin; 399 if (!space || space->local_remaining < n){ 400 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 401 } 402 space = stash->space; 403 l = space->local_used; 404 bs2 = bs*bs; 405 for (i=0; i<n; i++) { 406 space->idx[l] = row; 407 space->idy[l] = idxn[i]; 408 /* Now copy over the block of values. Store the values column oriented. 409 This enables inserting multiple blocks belonging to a row with a single 410 funtion call */ 411 array = space->val + bs2*l; 412 vals = values + idx*bs2*n + bs*i; 413 for (j=0; j<bs; j++) { 414 for (k=0; k<bs; k++) {array[k] = vals[k];} 415 array += bs; 416 vals += rmax*bs; 417 } 418 l++; 419 } 420 stash->n += n; 421 space->local_used += n; 422 space->local_remaining -= n; 423 PetscFunctionReturn(0); 424 } 425 /* 426 MatStashScatterBegin_Private - Initiates the transfer of values to the 427 correct owners. This function goes through the stash, and check the 428 owners of each stashed value, and sends the values off to the owner 429 processors. 430 431 Input Parameters: 432 stash - the stash 433 owners - an array of size 'no-of-procs' which gives the ownership range 434 for each node. 435 436 Notes: The 'owners' array in the cased of the blocked-stash has the 437 ranges specified blocked global indices, and for the regular stash in 438 the proper global indices. 439 */ 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatStashScatterBegin_Private" 442 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 443 { 444 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 445 PetscInt size=stash->size,nsends; 446 PetscErrorCode ierr; 447 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 448 PetscScalar **rvalues,*svalues; 449 MPI_Comm comm = stash->comm; 450 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 451 PetscMPIInt *nprocs,*nlengths,nreceives; 452 PetscInt *sp_idx,*sp_idy; 453 PetscScalar *sp_val; 454 PetscMatStashSpace space,space_next; 455 456 PetscFunctionBegin; 457 bs2 = stash->bs*stash->bs; 458 459 /* first count number of contributors to each processor */ 460 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 461 ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 462 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 463 ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 464 ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 465 466 i = j = 0; 467 lastidx = -1; 468 space = stash->space_head; 469 while (space != PETSC_NULL){ 470 space_next = space->next; 471 sp_idx = space->idx; 472 for (l=0; l<space->local_used; l++){ 473 /* if indices are NOT locally sorted, need to start search at the beginning */ 474 if (lastidx > (idx = sp_idx[l])) j = 0; 475 lastidx = idx; 476 for (; j<size; j++) { 477 if (idx >= owners[j] && idx < owners[j+1]) { 478 nlengths[j]++; owner[i] = j; break; 479 } 480 } 481 i++; 482 } 483 space = space_next; 484 } 485 /* Now check what procs get messages - and compute nsends. */ 486 for (i=0, nsends=0 ; i<size; i++) { 487 if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 488 } 489 490 {PetscMPIInt *onodes,*olengths; 491 /* Determine the number of messages to expect, their lengths, from from-ids */ 492 ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 493 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 494 /* since clubbing row,col - lengths are multiplied by 2 */ 495 for (i=0; i<nreceives; i++) olengths[i] *=2; 496 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 497 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 498 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 499 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 500 ierr = PetscFree(onodes);CHKERRQ(ierr); 501 ierr = PetscFree(olengths);CHKERRQ(ierr); 502 } 503 504 /* do sends: 505 1) starts[i] gives the starting index in svalues for stuff going to 506 the ith processor 507 */ 508 ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 509 ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 510 ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 511 /* use 2 sends the first with all_a, the next with all_i and all_j */ 512 startv[0] = 0; starti[0] = 0; 513 for (i=1; i<size; i++) { 514 startv[i] = startv[i-1] + nlengths[i-1]; 515 starti[i] = starti[i-1] + 2*nlengths[i-1]; 516 } 517 518 i = 0; 519 space = stash->space_head; 520 while (space != PETSC_NULL){ 521 space_next = space->next; 522 sp_idx = space->idx; 523 sp_idy = space->idy; 524 sp_val = space->val; 525 for (l=0; l<space->local_used; l++){ 526 j = owner[i]; 527 if (bs2 == 1) { 528 svalues[startv[j]] = sp_val[l]; 529 } else { 530 PetscInt k; 531 PetscScalar *buf1,*buf2; 532 buf1 = svalues+bs2*startv[j]; 533 buf2 = space->val + bs2*l; 534 for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 535 } 536 sindices[starti[j]] = sp_idx[l]; 537 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 538 startv[j]++; 539 starti[j]++; 540 i++; 541 } 542 space = space_next; 543 } 544 startv[0] = 0; 545 for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 546 547 for (i=0,count=0; i<size; i++) { 548 if (nprocs[i]) { 549 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 550 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 551 } 552 } 553 #if defined(PETSC_USE_INFO) 554 ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 555 for (i=0; i<size; i++) { 556 if (nprocs[i]) { 557 ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 558 } 559 } 560 #endif 561 ierr = PetscFree(nlengths);CHKERRQ(ierr); 562 ierr = PetscFree(owner);CHKERRQ(ierr); 563 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 564 ierr = PetscFree(nprocs);CHKERRQ(ierr); 565 566 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 567 ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 568 569 for (i=0; i<nreceives; i++) { 570 recv_waits[2*i] = recv_waits1[i]; 571 recv_waits[2*i+1] = recv_waits2[i]; 572 } 573 stash->recv_waits = recv_waits; 574 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 575 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 576 577 stash->svalues = svalues; 578 stash->sindices = sindices; 579 stash->rvalues = rvalues; 580 stash->rindices = rindices; 581 stash->send_waits = send_waits; 582 stash->nsends = nsends; 583 stash->nrecvs = nreceives; 584 stash->reproduce_count = 0; 585 PetscFunctionReturn(0); 586 } 587 588 /* 589 MatStashScatterGetMesg_Private - This function waits on the receives posted 590 in the function MatStashScatterBegin_Private() and returns one message at 591 a time to the calling function. If no messages are left, it indicates this 592 by setting flg = 0, else it sets flg = 1. 593 594 Input Parameters: 595 stash - the stash 596 597 Output Parameters: 598 nvals - the number of entries in the current message. 599 rows - an array of row indices (or blocked indices) corresponding to the values 600 cols - an array of columnindices (or blocked indices) corresponding to the values 601 vals - the values 602 flg - 0 indicates no more message left, and the current call has no values associated. 603 1 indicates that the current call successfully received a message, and the 604 other output parameters nvals,rows,cols,vals are set appropriately. 605 */ 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatStashScatterGetMesg_Private" 608 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 609 { 610 PetscErrorCode ierr; 611 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 612 PetscInt bs2; 613 MPI_Status recv_status; 614 PetscBool match_found = PETSC_FALSE; 615 616 PetscFunctionBegin; 617 618 *flg = 0; /* When a message is discovered this is reset to 1 */ 619 /* Return if no more messages to process */ 620 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 621 622 bs2 = stash->bs*stash->bs; 623 /* If a matching pair of receives are found, process them, and return the data to 624 the calling function. Until then keep receiving messages */ 625 while (!match_found) { 626 CHKMEMQ; 627 if (stash->reproduce) { 628 i = stash->reproduce_count++; 629 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 630 } else { 631 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 632 } 633 CHKMEMQ; 634 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 635 636 /* Now pack the received message into a structure which is usable by others */ 637 if (i % 2) { 638 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 639 flg_v[2*recv_status.MPI_SOURCE] = i/2; 640 *nvals = *nvals/bs2; 641 } else { 642 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 643 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 644 *nvals = *nvals/2; /* This message has both row indices and col indices */ 645 } 646 647 /* Check if we have both messages from this proc */ 648 i1 = flg_v[2*recv_status.MPI_SOURCE]; 649 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 650 if (i1 != -1 && i2 != -1) { 651 *rows = stash->rindices[i2]; 652 *cols = *rows + *nvals; 653 *vals = stash->rvalues[i1]; 654 *flg = 1; 655 stash->nprocessed ++; 656 match_found = PETSC_TRUE; 657 } 658 } 659 PetscFunctionReturn(0); 660 } 661