xref: /petsc/src/mat/utils/matstash.c (revision 8798bf22c13d6b0f19082f7079c241038c01d971)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: stash.c,v 1.26 1999/03/17 21:14:34 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/matimpl.h"
6 
7 #define DEFAULT_STASH_SIZE   10000
8 
9 /*
10   MatStashCreate_Private - Creates a stash ,currently used for all the parallel
11   matrix implementations. The stash is where elements of a matrix destined
12   to be stored on other processors are kept until matrix assembly is done.
13 
14   This is a simple minded stash. Simply adds entries to end of stash.
15 
16   Input Parameters:
17   comm - communicator, required for scatters.
18   bs   - stash block size. used when stashing blocks of values
19 
20   Output Parameters:
21   stash    - the newly created stash
22 */
23 #undef __FUNC__
24 #define __FUNC__ "MatStashCreate_Private"
25 int MatStashCreate_Private(MPI_Comm comm,int bs, MatStash *stash)
26 {
27   int ierr,flg,max=DEFAULT_STASH_SIZE/(bs*bs);
28 
29   PetscFunctionBegin;
30   /* Require 2 tags, get the second using PetscCommGetNewTag() */
31   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
32   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr);
33   ierr = OptionsGetInt(PETSC_NULL,"-matstash_initial_size",&max,&flg);CHKERRQ(ierr);
34   ierr = MatStashSetInitialSize_Private(stash,max); CHKERRQ(ierr);
35   ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr);
36   ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr);
37 
38   if (bs <= 0) bs = 1;
39 
40   stash->bs       = bs;
41   stash->nmax     = 0;
42   stash->n        = 0;
43   stash->reallocs = -1;
44   stash->idx      = 0;
45   stash->idy      = 0;
46   stash->array    = 0;
47 
48   stash->send_waits  = 0;
49   stash->recv_waits  = 0;
50   stash->send_status = 0;
51   stash->nsends      = 0;
52   stash->nrecvs      = 0;
53   stash->svalues     = 0;
54   stash->rvalues     = 0;
55   stash->rmax        = 0;
56   stash->nprocs      = 0;
57   stash->nprocessed  = 0;
58   PetscFunctionReturn(0);
59 }
60 
61 /*
62    MatStashDestroy_Private - Destroy the stash
63 */
64 #undef __FUNC__
65 #define __FUNC__ "MatStashDestroy_Private"
66 int MatStashDestroy_Private(MatStash *stash)
67 {
68   int ierr;
69 
70   PetscFunctionBegin;
71   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
72   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
73   PetscFunctionReturn(0);
74 }
75 
76 /*
77    MatStashScatterEnd_Private - This is called as the fial stage of
78    scatter. The final stages of messagepassing is done here, and
79    all the memory used for messagepassing is cleanedu up. This
80    routine also resets the stash, and deallocates the memory used
81    for the stash. It also keeps track of the current memory usage
82    so that the same value can be used the next time through.
83 */
84 #undef __FUNC__
85 #define __FUNC__ "MatStashScatterEnd_Private"
86 int MatStashScatterEnd_Private(MatStash *stash)
87 {
88   int         nsends=stash->nsends,ierr;
89   MPI_Status  *send_status;
90 
91   PetscFunctionBegin;
92   /* wait on sends */
93   if (nsends) {
94     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
95     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
96     PetscFree(send_status);
97   }
98 
99   /* Now update nmaxold to be app 10% more than max n used, this way the
100      wastage of space is reduced the next time this stash is used */
101   stash->oldnmax    = (int)(stash->n * 1.1) + 5;
102   stash->nmax       = 0;
103   stash->n          = 0;
104   stash->reallocs   = -1;
105   stash->rmax       = 0;
106   stash->nprocessed = 0;
107 
108   if (stash->array) {
109     PetscFree(stash->array);
110     stash->array = 0;
111     stash->idx   = 0;
112     stash->idy   = 0;
113   }
114   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
115   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
116   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
117   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
118   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
119 
120   PetscFunctionReturn(0);
121 }
122 
123 /*
124    MatStashGetInfo_Private - Gets the relavant statistics of the stash
125 
126    Input Parameters:
127    stash    - the stash
128    nstash   - the size of the stash
129    reallocs - the number of additional mallocs incurred.
130 
131 */
132 #undef __FUNC__
133 #define __FUNC__ "MatStashGetInfo_Private"
134 int MatStashGetInfo_Private(MatStash *stash,int *nstash, int *reallocs)
135 {
136   PetscFunctionBegin;
137   *nstash   = stash->n;
138   *reallocs = stash->reallocs;
139   PetscFunctionReturn(0);
140 }
141 
142 
143 /*
144    MatStashSetInitialSize_Private - Sets the initial size of the stash
145 
146    Input Parameters:
147    stash  - the stash
148    max    - the value that is used as the max size of the stash.
149             this value is used while allocating memory.
150 */
151 #undef __FUNC__
152 #define __FUNC__ "MatStashSetInitialSize_Private"
153 int MatStashSetInitialSize_Private(MatStash *stash,int max)
154 {
155   PetscFunctionBegin;
156   stash->oldnmax = max;
157   stash->nmax    = 0;
158   PetscFunctionReturn(0);
159 }
160 
161 /* MatStashExpand_Private - Expand the stash. This function is called
162    when the space in the stash is not sufficient to add the new values
163    being inserted into the stash.
164 
165    Input Parameters:
166    stash - the stash
167    incr  - the minimum increase requested
168 
169    Notes:
170    This routine doubles the currently used memory.
171  */
172 #undef __FUNC__
173 #define __FUNC__ "MatStashExpand_Private"
174 static int MatStashExpand_Private(MatStash *stash,int incr)
175 {
176   int    *n_idx,*n_idy,newnmax,bs2;
177   Scalar *n_array;
178 
179   PetscFunctionBegin;
180   /* allocate a larger stash */
181   if (stash->nmax == 0) newnmax = stash->oldnmax;
182   else                  newnmax = stash->nmax*2;
183   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
184 
185   bs2     = stash->bs*stash->bs;
186   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
187   n_idx   = (int *) (n_array + bs2*newnmax);
188   n_idy   = (int *) (n_idx + newnmax);
189   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));
190   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
191   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
192   if (stash->array) PetscFree(stash->array);
193   stash->array   = n_array;
194   stash->idx     = n_idx;
195   stash->idy     = n_idy;
196   stash->nmax    = newnmax;
197   stash->oldnmax = newnmax;
198   stash->reallocs++;
199   PetscFunctionReturn(0);
200 }
201 /*
202   MatStashValuesRow_Private - inserts values into the stash. This function
203   expects the values to be roworiented. Multiple columns belong to the same row
204   can be inserted with a single call to this function.
205 
206   Input Parameters:
207   stash  - the stash
208   row    - the global row correspoiding to the values
209   n      - the number of elements inserted. All elements belong to the above row.
210   idxn   - the global column indices corresponding to each of the values.
211   values - the values inserted
212 */
213 #undef __FUNC__
214 #define __FUNC__ "MatStashValuesRow_Private"
215 int MatStashValuesRow_Private(MatStash *stash,int row,int n, int *idxn,Scalar *values)
216 {
217   int    ierr,i;
218 
219   PetscFunctionBegin;
220   /* Check and see if we have sufficient memory */
221   if ((stash->n + n) > stash->nmax) {
222     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
223   }
224   for ( i=0; i<n; i++ ) {
225     stash->idx[stash->n]   = row;
226     stash->idy[stash->n]   = idxn[i];
227     stash->array[stash->n] = values[i];
228     stash->n++;
229   }
230   PetscFunctionReturn(0);
231 }
232 /*
233   MatStashValuesCol_Private - inserts values into the stash. This function
234   expects the values to be columnoriented. Multiple columns belong to the same row
235   can be inserted with a single call to this function.
236 
237   Input Parameters:
238   stash   - the stash
239   row     - the global row correspoiding to the values
240   n       - the number of elements inserted. All elements belong to the above row.
241   idxn    - the global column indices corresponding to each of the values.
242   values  - the values inserted
243   stepval - the consecutive values are sepated by a distance of stepval.
244             this happens because the input is columnoriented.
245 */
246 #undef __FUNC__
247 #define __FUNC__ "MatStashValuesCol_Private"
248 int MatStashValuesCol_Private(MatStash *stash,int row,int n, int *idxn,
249                                       Scalar *values,int stepval)
250 {
251   int    ierr,i;
252 
253   PetscFunctionBegin;
254   /* Check and see if we have sufficient memory */
255   if ((stash->n + n) > stash->nmax) {
256     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
257   }
258   for ( i=0; i<n; i++ ) {
259     stash->idx[stash->n]   = row;
260     stash->idy[stash->n]   = idxn[i];
261     stash->array[stash->n] = values[i*stepval];
262     stash->n++;
263   }
264   PetscFunctionReturn(0);
265 }
266 
267 /*
268   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
269   This function expects the values to be roworiented. Multiple columns belong
270   to the same block-row can be inserted with a single call to this function.
271   This function extracts the sub-block of values based on the dimensions of
272   the original input block, and the row,col values corresponding to the blocks.
273 
274   Input Parameters:
275   stash  - the stash
276   row    - the global block-row correspoiding to the values
277   n      - the number of elements inserted. All elements belong to the above row.
278   idxn   - the global block-column indices corresponding to each of the blocks of
279            values. Each block is of size bs*bs.
280   values - the values inserted
281   rmax   - the number of block-rows in the original block.
282   cmax   - the number of block-columsn on the original block.
283   idx    - the index of the current block-row in the original block.
284 */
285 #undef __FUNC__
286 #define __FUNC__ "MatStashValuesRowBlocked_Private"
287 int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,Scalar *values,
288                                int rmax,int cmax,int idx)
289 {
290   int    ierr,i,j,k,bs2,bs=stash->bs;
291   Scalar *vals,*array;
292 
293   PetscFunctionBegin;
294   bs2 = bs*bs;
295   if ((stash->n+n) > stash->nmax) {
296     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
297   }
298   for ( i=0; i<n; i++ ) {
299     stash->idx[stash->n]   = row;
300     stash->idy[stash->n] = idxn[i];
301     /* Now copy over the block of values. Store the values column oriented.
302        This enables inserting multiple blocks belonging to a row with a single
303        funtion call */
304     array = stash->array + bs2*stash->n;
305     vals  = values + idx*bs2*n + bs*i;
306     for ( j=0; j<bs; j++ ) {
307       for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
308       array += 1;
309       vals  += cmax*bs;
310     }
311     stash->n++;
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 /*
317   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
318   This function expects the values to be roworiented. Multiple columns belong
319   to the same block-row can be inserted with a single call to this function.
320   This function extracts the sub-block of values based on the dimensions of
321   the original input block, and the row,col values corresponding to the blocks.
322 
323   Input Parameters:
324   stash  - the stash
325   row    - the global block-row correspoiding to the values
326   n      - the number of elements inserted. All elements belong to the above row.
327   idxn   - the global block-column indices corresponding to each of the blocks of
328            values. Each block is of size bs*bs.
329   values - the values inserted
330   rmax   - the number of block-rows in the original block.
331   cmax   - the number of block-columsn on the original block.
332   idx    - the index of the current block-row in the original block.
333 */
334 #undef __FUNC__
335 #define __FUNC__ "MatStashValuesColBlocked_Private"
336 int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,
337                                              Scalar *values,int rmax,int cmax,int idx)
338 {
339   int    ierr,i,j,k,bs2,bs=stash->bs;
340   Scalar *vals,*array;
341 
342   PetscFunctionBegin;
343   bs2 = bs*bs;
344   if ((stash->n+n) > stash->nmax) {
345     ierr = MatStashExpand_Private(stash,n); CHKERRQ(ierr);
346   }
347   for ( i=0; i<n; i++ ) {
348     stash->idx[stash->n]   = row;
349     stash->idy[stash->n] = idxn[i];
350     /* Now copy over the block of values. Store the values column oriented.
351      This enables inserting multiple blocks belonging to a row with a single
352      funtion call */
353     array = stash->array + bs2*stash->n;
354     vals  = values + idx*bs + bs2*rmax*i;
355     for ( j=0; j<bs; j++ ) {
356       for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
357       array += bs;
358       vals  += rmax*bs;
359     }
360     stash->n++;
361   }
362   PetscFunctionReturn(0);
363 }
364 /*
365   MatStashScatterBegin_Private - Initiates the transfer of values to the
366   correct owners. This function goes through the stash, and check the
367   owners of each stashed value, and sends the values off to the owner
368   processors.
369 
370   Input Parameters:
371   stash  - the stash
372   owners - an array of size 'no-of-procs' which gives the ownership range
373            for each node.
374 
375   Notes: The 'owners' array in the cased of the blocked-stash has the
376   ranges specified blocked global indices, and for the regular stash in
377   the proper global indices.
378 */
379 #undef __FUNC__
380 #define __FUNC__ "MatStashScatterBegin_Private"
381 int MatStashScatterBegin_Private(MatStash *stash,int *owners)
382 {
383   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
384   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
385   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
386   Scalar      *rvalues,*svalues;
387   MPI_Comm    comm = stash->comm;
388   MPI_Request *send_waits,*recv_waits;
389 
390   PetscFunctionBegin;
391 
392   bs2 = stash->bs*stash->bs;
393   /*  first count number of contributors to each processor */
394   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
395   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
396   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
397 
398   for ( i=0; i<stash->n; i++ ) {
399     idx = stash->idx[i];
400     for ( j=0; j<size; j++ ) {
401       if (idx >= owners[j] && idx < owners[j+1]) {
402         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
403       }
404     }
405   }
406   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
407 
408   /* inform other processors of number of messages and max length*/
409   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
410   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
411   nreceives = work[rank];
412   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
413   nmax = work[rank];
414   PetscFree(work);
415   /* post receives:
416      since we don't know how long each individual message is we
417      allocate the largest needed buffer for each receive. Potentially
418      this is a lot of wasted space.
419   */
420   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
421   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
422   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
423   for ( i=0,count=0; i<nreceives; i++ ) {
424     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
425                      recv_waits+count++); CHKERRQ(ierr);
426     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
427                      recv_waits+count++); CHKERRQ(ierr);
428   }
429 
430   /* do sends:
431       1) starts[i] gives the starting index in svalues for stuff going to
432          the ith processor
433   */
434   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
435   sindices   = (int *) (svalues + bs2*stash->n);
436   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
437   CHKPTRQ(send_waits);
438   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
439   starti     = startv + size;
440   /* use 2 sends the first with all_a, the next with all_i and all_j */
441   startv[0]  = 0; starti[0] = 0;
442   for ( i=1; i<size; i++ ) {
443     startv[i] = startv[i-1] + nprocs[i-1];
444     starti[i] = starti[i-1] + nprocs[i-1]*2;
445   }
446   for ( i=0; i<stash->n; i++ ) {
447     j = owner[i];
448     if (bs2 == 1) {
449       svalues[startv[j]]              = stash->array[i];
450     } else {
451       int    k;
452       Scalar *buf1,*buf2;
453       buf1 = svalues+bs2*startv[j];
454       buf2 = stash->array+bs2*i;
455       for ( k=0; k<bs2; k++ ){ buf1[k] = buf2[k]; }
456     }
457     sindices[starti[j]]             = stash->idx[i];
458     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
459     startv[j]++;
460     starti[j]++;
461   }
462   startv[0] = 0;
463   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
464   for ( i=0,count=0; i<size; i++ ) {
465     if (procs[i]) {
466       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
467                        send_waits+count++);CHKERRQ(ierr);
468       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
469                        send_waits+count++);CHKERRQ(ierr);
470     }
471   }
472   PetscFree(owner);
473   PetscFree(startv);
474   /* This memory is reused in scatter end  for a different purpose*/
475   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
476   stash->nprocs      = nprocs;
477 
478   stash->svalues    = svalues;    stash->rvalues    = rvalues;
479   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
480   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
481   stash->rmax       = nmax;
482   PetscFunctionReturn(0);
483 }
484 
485 /*
486    MatStashScatterGetMesg_Private - This function waits on the receives posted
487    in the function MatStashScatterBegin_Private() and returns one message at
488    a time to the calling function. If no messages are left, it indicates this
489    by setting flg = 0, else it sets flg = 1.
490 
491    Input Parameters:
492    stash - the stash
493 
494    Output Parameters:
495    nvals - the number of entries in the current message.
496    rows  - an array of row indices (or blocked indices) corresponding to the values
497    cols  - an array of columnindices (or blocked indices) corresponding to the values
498    vals  - the values
499    flg   - 0 indicates no more message left, and the current call has no values associated.
500            1 indicates that the current call successfully received a message, and the
501              other output parameters nvals,rows,cols,vals are set appropriately.
502 */
503 #undef __FUNC__
504 #define __FUNC__ "MatStashScatterGetMesg_Private"
505 int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
506 {
507   int         i,ierr,size=stash->size,*flg_v,*flg_i;
508   int         i1,i2,*rindices,match_found=0,bs2;
509   MPI_Status  recv_status;
510 
511   PetscFunctionBegin;
512 
513   *flg = 0; /* When a message is discovered this is reset to 1 */
514   /* Return if no more messages to process */
515   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
516 
517   flg_v = stash->nprocs;
518   flg_i = flg_v + size;
519   bs2   = stash->bs*stash->bs;
520   /* If a matching pair of receieves are found, process them, and return the data to
521      the calling function. Until then keep receiving messages */
522   while (!match_found) {
523     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
524     /* Now pack the received message into a structure which is useable by others */
525     if (i % 2) {
526       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
527       flg_i[recv_status.MPI_SOURCE] = i/2;
528       *nvals = *nvals/2; /* This message has both row indices and col indices */
529     } else {
530       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
531       flg_v[recv_status.MPI_SOURCE] = i/2;
532       *nvals = *nvals/bs2;
533     }
534 
535     /* Check if we have both the messages from this proc */
536     i1 = flg_v[recv_status.MPI_SOURCE];
537     i2 = flg_i[recv_status.MPI_SOURCE];
538     if (i1 != -1 && i2 != -1) {
539       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
540       *rows       = rindices + 2*i2*stash->rmax;
541       *cols       = *rows + *nvals;
542       *vals       = stash->rvalues + i1*bs2*stash->rmax;
543       *flg        = 1;
544       stash->nprocessed ++;
545       match_found = 1;
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550