1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: stash.c,v 1.26 1999/03/17 21:14:34 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/matimpl.h" 6 7 #define DEFAULT_STASH_SIZE 10000 8 9 /* 10 MatStashCreate_Private - Creates a stash ,currently used for all the parallel 11 matrix implementations. The stash is where elements of a matrix destined 12 to be stored on other processors are kept until matrix assembly is done. 13 14 This is a simple minded stash. Simply adds entries to end of stash. 15 16 Input Parameters: 17 comm - communicator, required for scatters. 18 bs - stash block size. used when stashing blocks of values 19 20 Output Parameters: 21 stash - the newly created stash 22 */ 23 #undef __FUNC__ 24 #define __FUNC__ "MatStashCreate_Private" 25 int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash) 26 { 27 int ierr,flg,max=DEFAULT_STASH_SIZE/(bs*bs); 28 29 PetscFunctionBegin; 30 /* Require 2 tags, get the second using PetscCommGetNewTag() */ 31 ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 33 ierr = OptionsGetInt(PETSC_NULL,"-matstash_initial_size",&max,&flg);CHKERRQ(ierr); 34 ierr = MatStashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 35 ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 36 ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 37 38 if (bs <= 0) bs = 1; 39 40 stash->bs = bs; 41 stash->nmax = 0; 42 stash->n = 0; 43 stash->reallocs = -1; 44 stash->idx = 0; 45 stash->idy = 0; 46 stash->array = 0; 47 48 stash->send_waits = 0; 49 stash->recv_waits = 0; 50 stash->send_status = 0; 51 stash->nsends = 0; 52 stash->nrecvs = 0; 53 stash->svalues = 0; 54 stash->rvalues = 0; 55 stash->rmax = 0; 56 stash->nprocs = 0; 57 stash->nprocessed = 0; 58 PetscFunctionReturn(0); 59 } 60 61 /* 62 MatStashDestroy_Private - Destroy the stash 63 */ 64 #undef __FUNC__ 65 #define __FUNC__ "MatStashDestroy_Private" 66 int MatStashDestroy_Private(MatStash *stash) 67 { 68 int ierr; 69 70 PetscFunctionBegin; 71 ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 72 if (stash->array) {PetscFree(stash->array); stash->array = 0;} 73 PetscFunctionReturn(0); 74 } 75 76 /* 77 MatStashScatterEnd_Private - This is called as the fial stage of 78 scatter. The final stages of messagepassing is done here, and 79 all the memory used for messagepassing is cleanedu up. This 80 routine also resets the stash, and deallocates the memory used 81 for the stash. It also keeps track of the current memory usage 82 so that the same value can be used the next time through. 83 */ 84 #undef __FUNC__ 85 #define __FUNC__ "MatStashScatterEnd_Private" 86 int MatStashScatterEnd_Private(MatStash *stash) 87 { 88 int nsends=stash->nsends,ierr; 89 MPI_Status *send_status; 90 91 PetscFunctionBegin; 92 /* wait on sends */ 93 if (nsends) { 94 send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 95 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 96 PetscFree(send_status); 97 } 98 99 /* Now update nmaxold to be app 10% more than max n used, this way the 100 wastage of space is reduced the next time this stash is used */ 101 stash->oldnmax = (int)(stash->n * 1.1) + 5; 102 stash->nmax = 0; 103 stash->n = 0; 104 stash->reallocs = -1; 105 stash->rmax = 0; 106 stash->nprocessed = 0; 107 108 if (stash->array) { 109 PetscFree(stash->array); 110 stash->array = 0; 111 stash->idx = 0; 112 stash->idy = 0; 113 } 114 if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 115 if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 116 if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 117 if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 118 if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 119 120 PetscFunctionReturn(0); 121 } 122 123 /* 124 MatStashGetInfo_Private - Gets the relavant statistics of the stash 125 126 Input Parameters: 127 stash - the stash 128 nstash - the size of the stash 129 reallocs - the number of additional mallocs incurred. 130 131 */ 132 #undef __FUNC__ 133 #define __FUNC__ "MatStashGetInfo_Private" 134 int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs) 135 { 136 PetscFunctionBegin; 137 *nstash = stash->n; 138 *reallocs = stash->reallocs; 139 PetscFunctionReturn(0); 140 } 141 142 143 /* 144 MatStashSetInitialSize_Private - Sets the initial size of the stash 145 146 Input Parameters: 147 stash - the stash 148 max - the value that is used as the max size of the stash. 149 this value is used while allocating memory. 150 */ 151 #undef __FUNC__ 152 #define __FUNC__ "MatStashSetInitialSize_Private" 153 int MatStashSetInitialSize_Private(MatStash *stash,int max) 154 { 155 PetscFunctionBegin; 156 stash->oldnmax = max; 157 stash->nmax = 0; 158 PetscFunctionReturn(0); 159 } 160 161 /* MatStashExpand_Private - Expand the stash. This function is called 162 when the space in the stash is not sufficient to add the new values 163 being inserted into the stash. 164 165 Input Parameters: 166 stash - the stash 167 incr - the minimum increase requested 168 169 Notes: 170 This routine doubles the currently used memory. 171 */ 172 #undef __FUNC__ 173 #define __FUNC__ "MatStashExpand_Private" 174 static int MatStashExpand_Private(MatStash *stash,int incr) 175 { 176 int *n_idx,*n_idy,newnmax,bs2; 177 Scalar *n_array; 178 179 PetscFunctionBegin; 180 /* allocate a larger stash */ 181 if (stash->nmax == 0) newnmax = stash->oldnmax; 182 else newnmax = stash->nmax*2; 183 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 184 185 bs2 = stash->bs*stash->bs; 186 n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 187 n_idx = (int *) (n_array + bs2*newnmax); 188 n_idy = (int *) (n_idx + newnmax); 189 PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 190 PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 191 PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 192 if (stash->array) PetscFree(stash->array); 193 stash->array = n_array; 194 stash->idx = n_idx; 195 stash->idy = n_idy; 196 stash->nmax = newnmax; 197 stash->oldnmax = newnmax; 198 stash->reallocs++; 199 PetscFunctionReturn(0); 200 } 201 /* 202 MatStashValuesRow_Private - inserts values into the stash. This function 203 expects the values to be roworiented. Multiple columns belong to the same row 204 can be inserted with a single call to this function. 205 206 Input Parameters: 207 stash - the stash 208 row - the global row correspoiding to the values 209 n - the number of elements inserted. All elements belong to the above row. 210 idxn - the global column indices corresponding to each of the values. 211 values - the values inserted 212 */ 213 #undef __FUNC__ 214 #define __FUNC__ "MatStashValuesRow_Private" 215 int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values) 216 { 217 int ierr,i; 218 219 PetscFunctionBegin; 220 /* Check and see if we have sufficient memory */ 221 if ((stash->n + n) > stash->nmax) { 222 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 223 } 224 for ( i=0; i<n; i++ ) { 225 stash->idx[stash->n] = row; 226 stash->idy[stash->n] = idxn[i]; 227 stash->array[stash->n] = values[i]; 228 stash->n++; 229 } 230 PetscFunctionReturn(0); 231 } 232 /* 233 MatStashValuesCol_Private - inserts values into the stash. This function 234 expects the values to be columnoriented. Multiple columns belong to the same row 235 can be inserted with a single call to this function. 236 237 Input Parameters: 238 stash - the stash 239 row - the global row correspoiding to the values 240 n - the number of elements inserted. All elements belong to the above row. 241 idxn - the global column indices corresponding to each of the values. 242 values - the values inserted 243 stepval - the consecutive values are sepated by a distance of stepval. 244 this happens because the input is columnoriented. 245 */ 246 #undef __FUNC__ 247 #define __FUNC__ "MatStashValuesCol_Private" 248 int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn, 249 Scalar *values,int stepval) 250 { 251 int ierr,i; 252 253 PetscFunctionBegin; 254 /* Check and see if we have sufficient memory */ 255 if ((stash->n + n) > stash->nmax) { 256 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 257 } 258 for ( i=0; i<n; i++ ) { 259 stash->idx[stash->n] = row; 260 stash->idy[stash->n] = idxn[i]; 261 stash->array[stash->n] = values[i*stepval]; 262 stash->n++; 263 } 264 PetscFunctionReturn(0); 265 } 266 267 /* 268 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 269 This function expects the values to be roworiented. Multiple columns belong 270 to the same block-row can be inserted with a single call to this function. 271 This function extracts the sub-block of values based on the dimensions of 272 the original input block, and the row,col values corresponding to the blocks. 273 274 Input Parameters: 275 stash - the stash 276 row - the global block-row correspoiding to the values 277 n - the number of elements inserted. All elements belong to the above row. 278 idxn - the global block-column indices corresponding to each of the blocks of 279 values. Each block is of size bs*bs. 280 values - the values inserted 281 rmax - the number of block-rows in the original block. 282 cmax - the number of block-columsn on the original block. 283 idx - the index of the current block-row in the original block. 284 */ 285 #undef __FUNC__ 286 #define __FUNC__ "MatStashValuesRowBlocked_Private" 287 int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values, 288 int rmax,int cmax,int idx) 289 { 290 int ierr,i,j,k,bs2,bs=stash->bs; 291 Scalar *vals,*array; 292 293 PetscFunctionBegin; 294 bs2 = bs*bs; 295 if ((stash->n+n) > stash->nmax) { 296 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 297 } 298 for ( i=0; i<n; i++ ) { 299 stash->idx[stash->n] = row; 300 stash->idy[stash->n] = idxn[i]; 301 /* Now copy over the block of values. Store the values column oriented. 302 This enables inserting multiple blocks belonging to a row with a single 303 funtion call */ 304 array = stash->array + bs2*stash->n; 305 vals = values + idx*bs2*n + bs*i; 306 for ( j=0; j<bs; j++ ) { 307 for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 308 array += 1; 309 vals += cmax*bs; 310 } 311 stash->n++; 312 } 313 PetscFunctionReturn(0); 314 } 315 316 /* 317 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 318 This function expects the values to be roworiented. Multiple columns belong 319 to the same block-row can be inserted with a single call to this function. 320 This function extracts the sub-block of values based on the dimensions of 321 the original input block, and the row,col values corresponding to the blocks. 322 323 Input Parameters: 324 stash - the stash 325 row - the global block-row correspoiding to the values 326 n - the number of elements inserted. All elements belong to the above row. 327 idxn - the global block-column indices corresponding to each of the blocks of 328 values. Each block is of size bs*bs. 329 values - the values inserted 330 rmax - the number of block-rows in the original block. 331 cmax - the number of block-columsn on the original block. 332 idx - the index of the current block-row in the original block. 333 */ 334 #undef __FUNC__ 335 #define __FUNC__ "MatStashValuesColBlocked_Private" 336 int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn, 337 Scalar *values,int rmax,int cmax,int idx) 338 { 339 int ierr,i,j,k,bs2,bs=stash->bs; 340 Scalar *vals,*array; 341 342 PetscFunctionBegin; 343 bs2 = bs*bs; 344 if ((stash->n+n) > stash->nmax) { 345 ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr); 346 } 347 for ( i=0; i<n; i++ ) { 348 stash->idx[stash->n] = row; 349 stash->idy[stash->n] = idxn[i]; 350 /* Now copy over the block of values. Store the values column oriented. 351 This enables inserting multiple blocks belonging to a row with a single 352 funtion call */ 353 array = stash->array + bs2*stash->n; 354 vals = values + idx*bs + bs2*rmax*i; 355 for ( j=0; j<bs; j++ ) { 356 for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 357 array += bs; 358 vals += rmax*bs; 359 } 360 stash->n++; 361 } 362 PetscFunctionReturn(0); 363 } 364 /* 365 MatStashScatterBegin_Private - Initiates the transfer of values to the 366 correct owners. This function goes through the stash, and check the 367 owners of each stashed value, and sends the values off to the owner 368 processors. 369 370 Input Parameters: 371 stash - the stash 372 owners - an array of size 'no-of-procs' which gives the ownership range 373 for each node. 374 375 Notes: The 'owners' array in the cased of the blocked-stash has the 376 ranges specified blocked global indices, and for the regular stash in 377 the proper global indices. 378 */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatStashScatterBegin_Private" 381 int MatStashScatterBegin_Private(MatStash *stash,int *owners) 382 { 383 int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 384 int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 385 int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx; 386 Scalar *rvalues,*svalues; 387 MPI_Comm comm = stash->comm; 388 MPI_Request *send_waits,*recv_waits; 389 390 PetscFunctionBegin; 391 392 bs2 = stash->bs*stash->bs; 393 /* first count number of contributors to each processor */ 394 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 395 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 396 owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 397 398 for ( i=0; i<stash->n; i++ ) { 399 idx = stash->idx[i]; 400 for ( j=0; j<size; j++ ) { 401 if (idx >= owners[j] && idx < owners[j+1]) { 402 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 403 } 404 } 405 } 406 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 407 408 /* inform other processors of number of messages and max length*/ 409 work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 410 ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 411 nreceives = work[rank]; 412 ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 413 nmax = work[rank]; 414 PetscFree(work); 415 /* post receives: 416 since we don't know how long each individual message is we 417 allocate the largest needed buffer for each receive. Potentially 418 this is a lot of wasted space. 419 */ 420 rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 421 rindices = (int *) (rvalues + bs2*nreceives*nmax); 422 recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 423 for ( i=0,count=0; i<nreceives; i++ ) { 424 ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 425 recv_waits+count++); CHKERRQ(ierr); 426 ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 427 recv_waits+count++); CHKERRQ(ierr); 428 } 429 430 /* do sends: 431 1) starts[i] gives the starting index in svalues for stuff going to 432 the ith processor 433 */ 434 svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 435 sindices = (int *) (svalues + bs2*stash->n); 436 send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 437 CHKPTRQ(send_waits); 438 startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 439 starti = startv + size; 440 /* use 2 sends the first with all_a, the next with all_i and all_j */ 441 startv[0] = 0; starti[0] = 0; 442 for ( i=1; i<size; i++ ) { 443 startv[i] = startv[i-1] + nprocs[i-1]; 444 starti[i] = starti[i-1] + nprocs[i-1]*2; 445 } 446 for ( i=0; i<stash->n; i++ ) { 447 j = owner[i]; 448 if (bs2 == 1) { 449 svalues[startv[j]] = stash->array[i]; 450 } else { 451 int k; 452 Scalar *buf1,*buf2; 453 buf1 = svalues+bs2*startv[j]; 454 buf2 = stash->array+bs2*i; 455 for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; } 456 } 457 sindices[starti[j]] = stash->idx[i]; 458 sindices[starti[j]+nprocs[j]] = stash->idy[i]; 459 startv[j]++; 460 starti[j]++; 461 } 462 startv[0] = 0; 463 for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 464 for ( i=0,count=0; i<size; i++ ) { 465 if (procs[i]) { 466 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 467 send_waits+count++);CHKERRQ(ierr); 468 ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 469 send_waits+count++);CHKERRQ(ierr); 470 } 471 } 472 PetscFree(owner); 473 PetscFree(startv); 474 /* This memory is reused in scatter end for a different purpose*/ 475 for (i=0; i<2*size; i++ ) nprocs[i] = -1; 476 stash->nprocs = nprocs; 477 478 stash->svalues = svalues; stash->rvalues = rvalues; 479 stash->nsends = nsends; stash->nrecvs = nreceives; 480 stash->send_waits = send_waits; stash->recv_waits = recv_waits; 481 stash->rmax = nmax; 482 PetscFunctionReturn(0); 483 } 484 485 /* 486 MatStashScatterGetMesg_Private - This function waits on the receives posted 487 in the function MatStashScatterBegin_Private() and returns one message at 488 a time to the calling function. If no messages are left, it indicates this 489 by setting flg = 0, else it sets flg = 1. 490 491 Input Parameters: 492 stash - the stash 493 494 Output Parameters: 495 nvals - the number of entries in the current message. 496 rows - an array of row indices (or blocked indices) corresponding to the values 497 cols - an array of columnindices (or blocked indices) corresponding to the values 498 vals - the values 499 flg - 0 indicates no more message left, and the current call has no values associated. 500 1 indicates that the current call successfully received a message, and the 501 other output parameters nvals,rows,cols,vals are set appropriately. 502 */ 503 #undef __FUNC__ 504 #define __FUNC__ "MatStashScatterGetMesg_Private" 505 int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 506 { 507 int i,ierr,size=stash->size,*flg_v,*flg_i; 508 int i1,i2,*rindices,match_found=0,bs2; 509 MPI_Status recv_status; 510 511 PetscFunctionBegin; 512 513 *flg = 0; /* When a message is discovered this is reset to 1 */ 514 /* Return if no more messages to process */ 515 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 516 517 flg_v = stash->nprocs; 518 flg_i = flg_v + size; 519 bs2 = stash->bs*stash->bs; 520 /* If a matching pair of receieves are found, process them, and return the data to 521 the calling function. Until then keep receiving messages */ 522 while (!match_found) { 523 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 524 /* Now pack the received message into a structure which is useable by others */ 525 if (i % 2) { 526 ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 527 flg_i[recv_status.MPI_SOURCE] = i/2; 528 *nvals = *nvals/2; /* This message has both row indices and col indices */ 529 } else { 530 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 531 flg_v[recv_status.MPI_SOURCE] = i/2; 532 *nvals = *nvals/bs2; 533 } 534 535 /* Check if we have both the messages from this proc */ 536 i1 = flg_v[recv_status.MPI_SOURCE]; 537 i2 = flg_i[recv_status.MPI_SOURCE]; 538 if (i1 != -1 && i2 != -1) { 539 rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 540 *rows = rindices + 2*i2*stash->rmax; 541 *cols = *rows + *nvals; 542 *vals = stash->rvalues + i1*bs2*stash->rmax; 543 *flg = 1; 544 stash->nprocessed ++; 545 match_found = 1; 546 } 547 } 548 PetscFunctionReturn(0); 549 } 550