1 #define PETSCMAT_DLL 2 3 #include "private/matimpl.h" 4 5 #define DEFAULT_STASH_SIZE 10000 6 7 /* 8 MatStashCreate_Private - Creates a stash,currently used for all the parallel 9 matrix implementations. The stash is where elements of a matrix destined 10 to be stored on other processors are kept until matrix assembly is done. 11 12 This is a simple minded stash. Simply adds entries to end of stash. 13 14 Input Parameters: 15 comm - communicator, required for scatters. 16 bs - stash block size. used when stashing blocks of values 17 18 Output Parameters: 19 stash - the newly created stash 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatStashCreate_Private" 23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 24 { 25 PetscErrorCode ierr; 26 PetscInt max,*opt,nopt,i; 27 PetscTruth flg; 28 29 PetscFunctionBegin; 30 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 31 stash->comm = comm; 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36 ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); 37 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 38 39 40 nopt = stash->size; 41 ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 42 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 43 if (flg) { 44 if (nopt == 1) max = opt[0]; 45 else if (nopt == stash->size) max = opt[stash->rank]; 46 else if (stash->rank < nopt) max = opt[stash->rank]; 47 else max = 0; /* Use default */ 48 stash->umax = max; 49 } else { 50 stash->umax = 0; 51 } 52 ierr = PetscFree(opt);CHKERRQ(ierr); 53 if (bs <= 0) bs = 1; 54 55 stash->bs = bs; 56 stash->nmax = 0; 57 stash->oldnmax = 0; 58 stash->n = 0; 59 stash->reallocs = -1; 60 stash->space_head = 0; 61 stash->space = 0; 62 63 stash->send_waits = 0; 64 stash->recv_waits = 0; 65 stash->send_status = 0; 66 stash->nsends = 0; 67 stash->nrecvs = 0; 68 stash->svalues = 0; 69 stash->rvalues = 0; 70 stash->rindices = 0; 71 stash->nprocessed = 0; 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 MatStashDestroy_Private - Destroy the stash 77 */ 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatStashDestroy_Private" 80 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 81 { 82 PetscErrorCode ierr; 83 84 PetscFunctionBegin; 85 if (stash->space_head){ 86 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 87 stash->space_head = 0; 88 stash->space = 0; 89 } 90 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 /* 95 MatStashScatterEnd_Private - This is called as the fial stage of 96 scatter. The final stages of messagepassing is done here, and 97 all the memory used for messagepassing is cleanedu up. This 98 routine also resets the stash, and deallocates the memory used 99 for the stash. It also keeps track of the current memory usage 100 so that the same value can be used the next time through. 101 */ 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatStashScatterEnd_Private" 104 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 105 { 106 PetscErrorCode ierr; 107 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 108 MPI_Status *send_status; 109 110 PetscFunctionBegin; 111 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 112 /* wait on sends */ 113 if (nsends) { 114 ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 115 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 116 ierr = PetscFree(send_status);CHKERRQ(ierr); 117 } 118 119 /* Now update nmaxold to be app 10% more than max n used, this way the 120 wastage of space is reduced the next time this stash is used. 121 Also update the oldmax, only if it increases */ 122 if (stash->n) { 123 bs2 = stash->bs*stash->bs; 124 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 125 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 126 } 127 128 stash->nmax = 0; 129 stash->n = 0; 130 stash->reallocs = -1; 131 stash->nprocessed = 0; 132 if (stash->space_head){ 133 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 134 stash->space_head = 0; 135 stash->space = 0; 136 } 137 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 138 stash->send_waits = 0; 139 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 140 stash->recv_waits = 0; 141 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 142 stash->svalues = 0; 143 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 144 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 145 stash->rvalues = 0; 146 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 147 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 148 stash->rindices = 0; 149 PetscFunctionReturn(0); 150 } 151 152 /* 153 MatStashGetInfo_Private - Gets the relavant statistics of the stash 154 155 Input Parameters: 156 stash - the stash 157 nstash - the size of the stash. Indicates the number of values stored. 158 reallocs - the number of additional mallocs incurred. 159 160 */ 161 #undef __FUNCT__ 162 #define __FUNCT__ "MatStashGetInfo_Private" 163 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 164 { 165 PetscInt bs2 = stash->bs*stash->bs; 166 167 PetscFunctionBegin; 168 if (nstash) *nstash = stash->n*bs2; 169 if (reallocs) { 170 if (stash->reallocs < 0) *reallocs = 0; 171 else *reallocs = stash->reallocs; 172 } 173 PetscFunctionReturn(0); 174 } 175 176 /* 177 MatStashSetInitialSize_Private - Sets the initial size of the stash 178 179 Input Parameters: 180 stash - the stash 181 max - the value that is used as the max size of the stash. 182 this value is used while allocating memory. 183 */ 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatStashSetInitialSize_Private" 186 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 187 { 188 PetscFunctionBegin; 189 stash->umax = max; 190 PetscFunctionReturn(0); 191 } 192 193 /* MatStashExpand_Private - Expand the stash. This function is called 194 when the space in the stash is not sufficient to add the new values 195 being inserted into the stash. 196 197 Input Parameters: 198 stash - the stash 199 incr - the minimum increase requested 200 201 Notes: 202 This routine doubles the currently used memory. 203 */ 204 #undef __FUNCT__ 205 #define __FUNCT__ "MatStashExpand_Private" 206 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 207 { 208 PetscErrorCode ierr; 209 PetscInt newnmax,bs2= stash->bs*stash->bs; 210 211 PetscFunctionBegin; 212 /* allocate a larger stash */ 213 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 214 if (stash->umax) newnmax = stash->umax/bs2; 215 else newnmax = DEFAULT_STASH_SIZE/bs2; 216 } else if (!stash->nmax) { /* resuing stash */ 217 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 218 else newnmax = stash->oldnmax/bs2; 219 } else newnmax = stash->nmax*2; 220 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 221 222 /* Get a MatStashSpace and attach it to stash */ 223 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 224 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 225 stash->space_head = stash->space; 226 } 227 228 stash->reallocs++; 229 stash->nmax = newnmax; 230 PetscFunctionReturn(0); 231 } 232 /* 233 MatStashValuesRow_Private - inserts values into the stash. This function 234 expects the values to be roworiented. Multiple columns belong to the same row 235 can be inserted with a single call to this function. 236 237 Input Parameters: 238 stash - the stash 239 row - the global row correspoiding to the values 240 n - the number of elements inserted. All elements belong to the above row. 241 idxn - the global column indices corresponding to each of the values. 242 values - the values inserted 243 */ 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatStashValuesRow_Private" 246 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries) 247 { 248 PetscErrorCode ierr; 249 PetscInt i,k,cnt = 0; 250 PetscMatStashSpace space=stash->space; 251 252 PetscFunctionBegin; 253 /* Check and see if we have sufficient memory */ 254 if (!space || space->local_remaining < n){ 255 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 256 } 257 space = stash->space; 258 k = space->local_used; 259 for (i=0; i<n; i++) { 260 if (ignorezeroentries && (values[i] == 0.0)) continue; 261 space->idx[k] = row; 262 space->idy[k] = idxn[i]; 263 space->val[k] = values[i]; 264 k++; 265 cnt++; 266 } 267 stash->n += cnt; 268 space->local_used += cnt; 269 space->local_remaining -= cnt; 270 PetscFunctionReturn(0); 271 } 272 273 /* 274 MatStashValuesCol_Private - inserts values into the stash. This function 275 expects the values to be columnoriented. Multiple columns belong to the same row 276 can be inserted with a single call to this function. 277 278 Input Parameters: 279 stash - the stash 280 row - the global row correspoiding to the values 281 n - the number of elements inserted. All elements belong to the above row. 282 idxn - the global column indices corresponding to each of the values. 283 values - the values inserted 284 stepval - the consecutive values are sepated by a distance of stepval. 285 this happens because the input is columnoriented. 286 */ 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatStashValuesCol_Private" 289 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries) 290 { 291 PetscErrorCode ierr; 292 PetscInt i,k,cnt = 0; 293 PetscMatStashSpace space=stash->space; 294 295 PetscFunctionBegin; 296 /* Check and see if we have sufficient memory */ 297 if (!space || space->local_remaining < n){ 298 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 299 } 300 space = stash->space; 301 k = space->local_used; 302 for (i=0; i<n; i++) { 303 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 304 space->idx[k] = row; 305 space->idy[k] = idxn[i]; 306 space->val[k] = values[i*stepval]; 307 k++; 308 cnt++; 309 } 310 stash->n += cnt; 311 space->local_used += cnt; 312 space->local_remaining -= cnt; 313 PetscFunctionReturn(0); 314 } 315 316 /* 317 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 318 This function expects the values to be roworiented. Multiple columns belong 319 to the same block-row can be inserted with a single call to this function. 320 This function extracts the sub-block of values based on the dimensions of 321 the original input block, and the row,col values corresponding to the blocks. 322 323 Input Parameters: 324 stash - the stash 325 row - the global block-row correspoiding to the values 326 n - the number of elements inserted. All elements belong to the above row. 327 idxn - the global block-column indices corresponding to each of the blocks of 328 values. Each block is of size bs*bs. 329 values - the values inserted 330 rmax - the number of block-rows in the original block. 331 cmax - the number of block-columsn on the original block. 332 idx - the index of the current block-row in the original block. 333 */ 334 #undef __FUNCT__ 335 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 336 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 337 { 338 PetscErrorCode ierr; 339 PetscInt i,j,k,bs2,bs=stash->bs,l; 340 const PetscScalar *vals; 341 PetscScalar *array; 342 PetscMatStashSpace space=stash->space; 343 344 PetscFunctionBegin; 345 if (!space || space->local_remaining < n){ 346 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 347 } 348 space = stash->space; 349 l = space->local_used; 350 bs2 = bs*bs; 351 for (i=0; i<n; i++) { 352 space->idx[l] = row; 353 space->idy[l] = idxn[i]; 354 /* Now copy over the block of values. Store the values column oriented. 355 This enables inserting multiple blocks belonging to a row with a single 356 funtion call */ 357 array = space->val + bs2*l; 358 vals = values + idx*bs2*n + bs*i; 359 for (j=0; j<bs; j++) { 360 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 361 array++; 362 vals += cmax*bs; 363 } 364 l++; 365 } 366 stash->n += n; 367 space->local_used += n; 368 space->local_remaining -= n; 369 PetscFunctionReturn(0); 370 } 371 372 /* 373 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 374 This function expects the values to be roworiented. Multiple columns belong 375 to the same block-row can be inserted with a single call to this function. 376 This function extracts the sub-block of values based on the dimensions of 377 the original input block, and the row,col values corresponding to the blocks. 378 379 Input Parameters: 380 stash - the stash 381 row - the global block-row correspoiding to the values 382 n - the number of elements inserted. All elements belong to the above row. 383 idxn - the global block-column indices corresponding to each of the blocks of 384 values. Each block is of size bs*bs. 385 values - the values inserted 386 rmax - the number of block-rows in the original block. 387 cmax - the number of block-columsn on the original block. 388 idx - the index of the current block-row in the original block. 389 */ 390 #undef __FUNCT__ 391 #define __FUNCT__ "MatStashValuesColBlocked_Private" 392 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 393 { 394 PetscErrorCode ierr; 395 PetscInt i,j,k,bs2,bs=stash->bs,l; 396 const PetscScalar *vals; 397 PetscScalar *array; 398 PetscMatStashSpace space=stash->space; 399 400 PetscFunctionBegin; 401 if (!space || space->local_remaining < n){ 402 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 403 } 404 space = stash->space; 405 l = space->local_used; 406 bs2 = bs*bs; 407 for (i=0; i<n; i++) { 408 space->idx[l] = row; 409 space->idy[l] = idxn[i]; 410 /* Now copy over the block of values. Store the values column oriented. 411 This enables inserting multiple blocks belonging to a row with a single 412 funtion call */ 413 array = space->val + bs2*l; 414 vals = values + idx*bs2*n + bs*i; 415 for (j=0; j<bs; j++) { 416 for (k=0; k<bs; k++) {array[k] = vals[k];} 417 array += bs; 418 vals += rmax*bs; 419 } 420 l++; 421 } 422 stash->n += n; 423 space->local_used += n; 424 space->local_remaining -= n; 425 PetscFunctionReturn(0); 426 } 427 /* 428 MatStashScatterBegin_Private - Initiates the transfer of values to the 429 correct owners. This function goes through the stash, and check the 430 owners of each stashed value, and sends the values off to the owner 431 processors. 432 433 Input Parameters: 434 stash - the stash 435 owners - an array of size 'no-of-procs' which gives the ownership range 436 for each node. 437 438 Notes: The 'owners' array in the cased of the blocked-stash has the 439 ranges specified blocked global indices, and for the regular stash in 440 the proper global indices. 441 */ 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatStashScatterBegin_Private" 444 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 445 { 446 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 447 PetscInt size=stash->size,nsends; 448 PetscErrorCode ierr; 449 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 450 PetscScalar **rvalues,*svalues; 451 MPI_Comm comm = stash->comm; 452 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 453 PetscMPIInt *nprocs,*nlengths,nreceives; 454 PetscInt *sp_idx,*sp_idy; 455 PetscScalar *sp_val; 456 PetscMatStashSpace space,space_next; 457 458 PetscFunctionBegin; 459 bs2 = stash->bs*stash->bs; 460 461 /* first count number of contributors to each processor */ 462 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 463 ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 464 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 465 ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 466 ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 467 468 i = j = 0; 469 lastidx = -1; 470 space = stash->space_head; 471 while (space != PETSC_NULL){ 472 space_next = space->next; 473 sp_idx = space->idx; 474 for (l=0; l<space->local_used; l++){ 475 /* if indices are NOT locally sorted, need to start search at the beginning */ 476 if (lastidx > (idx = sp_idx[l])) j = 0; 477 lastidx = idx; 478 for (; j<size; j++) { 479 if (idx >= owners[j] && idx < owners[j+1]) { 480 nlengths[j]++; owner[i] = j; break; 481 } 482 } 483 i++; 484 } 485 space = space_next; 486 } 487 /* Now check what procs get messages - and compute nsends. */ 488 for (i=0, nsends=0 ; i<size; i++) { 489 if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 490 } 491 492 {PetscMPIInt *onodes,*olengths; 493 /* Determine the number of messages to expect, their lengths, from from-ids */ 494 ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 495 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 496 /* since clubbing row,col - lengths are multiplied by 2 */ 497 for (i=0; i<nreceives; i++) olengths[i] *=2; 498 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 499 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 500 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 501 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 502 ierr = PetscFree(onodes);CHKERRQ(ierr); 503 ierr = PetscFree(olengths);CHKERRQ(ierr); 504 } 505 506 /* do sends: 507 1) starts[i] gives the starting index in svalues for stuff going to 508 the ith processor 509 */ 510 ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 511 ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 512 ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 513 /* use 2 sends the first with all_a, the next with all_i and all_j */ 514 startv[0] = 0; starti[0] = 0; 515 for (i=1; i<size; i++) { 516 startv[i] = startv[i-1] + nlengths[i-1]; 517 starti[i] = starti[i-1] + 2*nlengths[i-1]; 518 } 519 520 i = 0; 521 space = stash->space_head; 522 while (space != PETSC_NULL){ 523 space_next = space->next; 524 sp_idx = space->idx; 525 sp_idy = space->idy; 526 sp_val = space->val; 527 for (l=0; l<space->local_used; l++){ 528 j = owner[i]; 529 if (bs2 == 1) { 530 svalues[startv[j]] = sp_val[l]; 531 } else { 532 PetscInt k; 533 PetscScalar *buf1,*buf2; 534 buf1 = svalues+bs2*startv[j]; 535 buf2 = space->val + bs2*l; 536 for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 537 } 538 sindices[starti[j]] = sp_idx[l]; 539 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 540 startv[j]++; 541 starti[j]++; 542 i++; 543 } 544 space = space_next; 545 } 546 startv[0] = 0; 547 for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 548 549 for (i=0,count=0; i<size; i++) { 550 if (nprocs[i]) { 551 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 552 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 553 } 554 } 555 #if defined(PETSC_USE_INFO) 556 ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 557 for (i=0; i<size; i++) { 558 if (nprocs[i]) { 559 ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 560 } 561 } 562 #endif 563 ierr = PetscFree(nlengths);CHKERRQ(ierr); 564 ierr = PetscFree(owner);CHKERRQ(ierr); 565 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 566 ierr = PetscFree(nprocs);CHKERRQ(ierr); 567 568 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 569 ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 570 571 for (i=0; i<nreceives; i++) { 572 recv_waits[2*i] = recv_waits1[i]; 573 recv_waits[2*i+1] = recv_waits2[i]; 574 } 575 stash->recv_waits = recv_waits; 576 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 577 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 578 579 stash->svalues = svalues; 580 stash->sindices = sindices; 581 stash->rvalues = rvalues; 582 stash->rindices = rindices; 583 stash->send_waits = send_waits; 584 stash->nsends = nsends; 585 stash->nrecvs = nreceives; 586 PetscFunctionReturn(0); 587 } 588 589 /* 590 MatStashScatterGetMesg_Private - This function waits on the receives posted 591 in the function MatStashScatterBegin_Private() and returns one message at 592 a time to the calling function. If no messages are left, it indicates this 593 by setting flg = 0, else it sets flg = 1. 594 595 Input Parameters: 596 stash - the stash 597 598 Output Parameters: 599 nvals - the number of entries in the current message. 600 rows - an array of row indices (or blocked indices) corresponding to the values 601 cols - an array of columnindices (or blocked indices) corresponding to the values 602 vals - the values 603 flg - 0 indicates no more message left, and the current call has no values associated. 604 1 indicates that the current call successfully received a message, and the 605 other output parameters nvals,rows,cols,vals are set appropriately. 606 */ 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatStashScatterGetMesg_Private" 609 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 610 { 611 PetscErrorCode ierr; 612 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 613 PetscInt bs2; 614 MPI_Status recv_status; 615 PetscTruth match_found = PETSC_FALSE; 616 617 PetscFunctionBegin; 618 619 *flg = 0; /* When a message is discovered this is reset to 1 */ 620 /* Return if no more messages to process */ 621 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 622 623 bs2 = stash->bs*stash->bs; 624 /* If a matching pair of receieves are found, process them, and return the data to 625 the calling function. Until then keep receiving messages */ 626 while (!match_found) { 627 CHKMEMQ; 628 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 629 CHKMEMQ; 630 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 631 632 /* Now pack the received message into a structure which is useable by others */ 633 if (i % 2) { 634 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 635 flg_v[2*recv_status.MPI_SOURCE] = i/2; 636 *nvals = *nvals/bs2; 637 } else { 638 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 639 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 640 *nvals = *nvals/2; /* This message has both row indices and col indices */ 641 } 642 643 /* Check if we have both messages from this proc */ 644 i1 = flg_v[2*recv_status.MPI_SOURCE]; 645 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 646 if (i1 != -1 && i2 != -1) { 647 *rows = stash->rindices[i2]; 648 *cols = *rows + *nvals; 649 *vals = stash->rvalues[i1]; 650 *flg = 1; 651 stash->nprocessed ++; 652 match_found = PETSC_TRUE; 653 } 654 } 655 PetscFunctionReturn(0); 656 } 657