1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: stash.c,v 1.23 1999/03/11 22:51:05 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/matimpl.h" 6 7 #define DEFAULT_STASH_SIZE 10000 8 /* 9 This stash is currently used for all the parallel matrix implementations. 10 The stash is where elements of a matrix destined to be stored on other 11 processors are kept until matrix assembly is done. 12 13 This is a simple minded stash. Simply add entry to end of stash. 14 */ 15 16 #undef __FUNC__ 17 #define __FUNC__ "StashCreate_Private" 18 int StashCreate_Private(MPI_Comm comm,int bs_stash, int bs_mat, Stash *stash) 19 { 20 int ierr,flg,max=DEFAULT_STASH_SIZE/(bs_stash*bs_stash); 21 22 PetscFunctionBegin; 23 /* Require 2 tags, get the second using PetscCommGetNewTag() */ 24 ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 25 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 26 ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr); 27 ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 28 ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 29 ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 30 31 if (bs_stash <= 0) bs_stash = 1; 32 if (bs_mat <= 0) bs_mat = 1; 33 34 stash->bs_stash = bs_stash; 35 stash->bs_mat = bs_mat; 36 stash->nmax = 0; 37 stash->n = 0; 38 stash->reallocs = 0; 39 stash->idx = 0; 40 stash->idy = 0; 41 stash->array = 0; 42 43 stash->send_waits = 0; 44 stash->recv_waits = 0; 45 stash->send_status = 0; 46 stash->nsends = 0; 47 stash->nrecvs = 0; 48 stash->svalues = 0; 49 stash->rvalues = 0; 50 stash->rmax = 0; 51 stash->nprocs = 0; 52 stash->nprocessed = 0; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNC__ 57 #define __FUNC__ "StashDestroy_Private" 58 int StashDestroy_Private(Stash *stash) 59 { 60 int ierr; 61 62 PetscFunctionBegin; 63 ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 64 if (stash->array) {PetscFree(stash->array); stash->array = 0;} 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNC__ 69 #define __FUNC__ "StashScatterEnd_Private" 70 int StashScatterEnd_Private(Stash *stash) 71 { 72 int nsends=stash->nsends,ierr; 73 MPI_Status *send_status; 74 75 PetscFunctionBegin; 76 /* wait on sends */ 77 if (nsends) { 78 send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 79 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 80 PetscFree(send_status); 81 } 82 83 /* Now update nmaxold to be app 10% more than nmax, this way the 84 wastage of space is reduced the next time this stash is used */ 85 stash->oldnmax = (int)(stash->nmax * 1.1) + 5; 86 stash->nmax = 0; 87 stash->n = 0; 88 stash->reallocs = 0; 89 stash->rmax = 0; 90 stash->nprocessed = 0; 91 92 if (stash->array) { 93 PetscFree(stash->array); 94 stash->array = 0; 95 stash->idx = 0; 96 stash->idy = 0; 97 } 98 if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 99 if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 100 if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 101 if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 102 if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 103 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNC__ 108 #define __FUNC__ "StashInfo_Private" 109 int StashInfo_Private(Stash *stash) 110 { 111 PetscFunctionBegin; 112 PLogInfo(0,"StashInfo_Private:Stash size %d, mallocs incured %d\n",stash->n,stash->reallocs); 113 PetscFunctionReturn(0); 114 } 115 #undef __FUNC__ 116 #define __FUNC__ "StashSetInitialSize_Private" 117 int StashSetInitialSize_Private(Stash *stash,int max) 118 { 119 PetscFunctionBegin; 120 stash->oldnmax = max; 121 stash->nmax = 0; 122 PetscFunctionReturn(0); 123 } 124 125 #undef __FUNC__ 126 #define __FUNC__ "StashExpand_Private" 127 static int StashExpand_Private(Stash *stash) 128 { 129 int *n_idx,*n_idy,newnmax,bs2; 130 Scalar *n_array; 131 132 PetscFunctionBegin; 133 /* allocate a larger stash */ 134 if (stash->nmax == 0) newnmax = stash->oldnmax; 135 else newnmax = stash->nmax*2; 136 137 bs2 = stash->bs_stash*stash->bs_stash; 138 n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 139 n_idx = (int *) (n_array + bs2*newnmax); 140 n_idy = (int *) (n_idx + newnmax); 141 PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 142 PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 143 PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 144 if (stash->array) PetscFree(stash->array); 145 stash->array = n_array; 146 stash->idx = n_idx; 147 stash->idy = n_idy; 148 stash->nmax = newnmax; 149 stash->oldnmax = newnmax; 150 stash->reallocs++; 151 PetscFunctionReturn(0); 152 } 153 154 /* 155 Should do this properly. With a sorted array. 156 */ 157 #undef __FUNC__ 158 #define __FUNC__ "StashValues_Private" 159 int StashValues_Private(Stash *stash,int row,int n, int *idxn,Scalar *values) 160 { 161 int ierr,i; 162 163 PetscFunctionBegin; 164 for ( i=0; i<n; i++ ) { 165 if ( stash->n == stash->nmax ) { 166 ierr = StashExpand_Private(stash); CHKERRQ(ierr); 167 } 168 stash->idx[stash->n] = row; 169 stash->idy[stash->n] = idxn[i]; 170 stash->array[stash->n] = values[i]; 171 stash->n++; 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNC__ 177 #define __FUNC__ "StashValuesBlocked_Private" 178 int StashValuesBlocked_Private(Stash *stash,int row,int n,int *idxn,Scalar *values, 179 int rmax,int cmax,int roworiented,int idx) 180 { 181 int ierr,i,j,k,bs2,bs=stash->bs_stash; 182 Scalar *vals,*array; 183 184 /* stepval gives the offset that one should use to access the next line of 185 a given block of values */ 186 187 PetscFunctionBegin; 188 bs2 = bs*bs; 189 for ( i=0; i<n; i++ ) { 190 if ( stash->n == stash->nmax ) { 191 ierr = StashExpand_Private(stash); CHKERRQ(ierr); 192 } 193 stash->idx[stash->n] = row; 194 stash->idy[stash->n] = idxn[i]; 195 /* Now copy over the block of values. Store the values column oriented. 196 This enables inserting multiple blocks belonging to a row with a single 197 funtion call */ 198 if (roworiented) { 199 array = stash->array + bs2*stash->n; 200 vals = values + idx*bs2*n + bs*i; 201 for ( j=0; j<bs; j++ ) { 202 for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 203 array += 1; 204 vals += cmax*bs; 205 } 206 } else { 207 array = stash->array + bs2*stash->n; 208 vals = values + idx*bs + bs2*rmax*i; 209 for ( j=0; j<bs; j++ ) { 210 for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 211 array += bs; 212 vals += rmax*bs; 213 } 214 } 215 stash->n++; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 #undef __FUNC__ 221 #define __FUNC__ "StashScatterBegin_Private" 222 int StashScatterBegin_Private(Stash *stash,int *owners) 223 { 224 int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 225 int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 226 int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx,mult; 227 Scalar *rvalues,*svalues; 228 MPI_Comm comm = stash->comm; 229 MPI_Request *send_waits,*recv_waits; 230 231 PetscFunctionBegin; 232 233 bs2 = stash->bs_stash*stash->bs_stash; 234 /* first count number of contributors to each processor */ 235 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 236 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 237 owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 238 239 /* if blockstash, then the the owners and row,col indices 240 correspond to reduced indices */ 241 if (stash->bs_stash == 1) mult = stash->bs_mat; 242 else mult = 1; 243 244 for ( i=0; i<stash->n; i++ ) { 245 idx = stash->idx[i]; 246 for ( j=0; j<size; j++ ) { 247 if (idx >= mult*owners[j] && idx < mult*owners[j+1]) { 248 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 249 } 250 } 251 } 252 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 253 254 /* inform other processors of number of messages and max length*/ 255 work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 256 ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 257 nreceives = work[rank]; 258 ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 259 nmax = work[rank]; 260 PetscFree(work); 261 /* post receives: 262 since we don't know how long each individual message is we 263 allocate the largest needed buffer for each receive. Potentially 264 this is a lot of wasted space. 265 */ 266 rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 267 rindices = (int *) (rvalues + bs2*nreceives*nmax); 268 recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 269 for ( i=0,count=0; i<nreceives; i++ ) { 270 ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 271 recv_waits+count++); CHKERRQ(ierr); 272 ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 273 recv_waits+count++); CHKERRQ(ierr); 274 } 275 276 /* do sends: 277 1) starts[i] gives the starting index in svalues for stuff going to 278 the ith processor 279 */ 280 svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 281 sindices = (int *) (svalues + bs2*stash->n); 282 send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 283 CHKPTRQ(send_waits); 284 startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 285 starti = startv + size; 286 /* use 2 sends the first with all_a, the next with all_i and all_j */ 287 startv[0] = 0; starti[0] = 0; 288 for ( i=1; i<size; i++ ) { 289 startv[i] = startv[i-1] + nprocs[i-1]; 290 starti[i] = starti[i-1] + nprocs[i-1]*2; 291 } 292 for ( i=0; i<stash->n; i++ ) { 293 j = owner[i]; 294 if (bs2 == 1) { 295 svalues[startv[j]] = stash->array[i]; 296 } else { 297 PetscMemcpy(svalues+bs2*startv[j],stash->array+bs2*i,bs2*sizeof(Scalar)); 298 } 299 sindices[starti[j]] = stash->idx[i]; 300 sindices[starti[j]+nprocs[j]] = stash->idy[i]; 301 startv[j]++; 302 starti[j]++; 303 } 304 startv[0] = 0; 305 for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 306 for ( i=0,count=0; i<size; i++ ) { 307 if (procs[i]) { 308 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 309 send_waits+count++);CHKERRQ(ierr); 310 ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 311 send_waits+count++);CHKERRQ(ierr); 312 } 313 } 314 PetscFree(owner); 315 PetscFree(startv); 316 /* This memory is reused in scatter end for a different purpose*/ 317 for (i=0; i<2*size; i++ ) nprocs[i] = -1; 318 stash->nprocs = nprocs; 319 320 stash->svalues = svalues; stash->rvalues = rvalues; 321 stash->nsends = nsends; stash->nrecvs = nreceives; 322 stash->send_waits = send_waits; stash->recv_waits = recv_waits; 323 stash->rmax = nmax; 324 PetscFunctionReturn(0); 325 } 326 327 /* 328 This function waits on the receives posted in the function 329 StashScatterBegin_Private() and returns one message at a time to 330 the calling function. If no messages are left, it indicates this by 331 setting flg = 0, else it sets flg = 1. 332 */ 333 #undef __FUNC__ 334 #define __FUNC__ "StashScatterGetMesg_Private" 335 int StashScatterGetMesg_Private(Stash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 336 { 337 int i,ierr,size=stash->size,*flg_v,*flg_i; 338 int i1,i2,*rindices,match_found=0,bs2; 339 MPI_Status recv_status; 340 341 PetscFunctionBegin; 342 343 *flg = 0; /* When a message is discovered this is reset to 1 */ 344 /* Return if no more messages to process */ 345 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 346 347 flg_v = stash->nprocs; 348 flg_i = flg_v + size; 349 bs2 = stash->bs_stash*stash->bs_stash; 350 /* If a matching pair of receieves are found, process them, and return the data to 351 the calling function. Until then keep receiving messages */ 352 while (!match_found) { 353 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 354 /* Now pack the received message into a structure which is useable by others */ 355 if (i % 2) { 356 ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 357 flg_i[recv_status.MPI_SOURCE] = i/2; 358 *nvals = *nvals/2; /* This message has both row indices and col indices */ 359 } else { 360 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 361 flg_v[recv_status.MPI_SOURCE] = i/2; 362 *nvals = *nvals/bs2; 363 } 364 365 /* Check if we have both the messages from this proc */ 366 i1 = flg_v[recv_status.MPI_SOURCE]; 367 i2 = flg_i[recv_status.MPI_SOURCE]; 368 if (i1 != -1 && i2 != -1) { 369 rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 370 *rows = rindices + 2*i2*stash->rmax; 371 *cols = *rows + *nvals; 372 *vals = stash->rvalues + i1*bs2*stash->rmax; 373 *flg = 1; 374 stash->nprocessed ++; 375 match_found = 1; 376 } 377 } 378 PetscFunctionReturn(0); 379 } 380