xref: /petsc/src/mat/utils/matstash.c (revision af2d14d21c1ae1f12e83141719c13b01f0eba9cd)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: stash.c,v 1.23 1999/03/11 22:51:05 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/matimpl.h"
6 
7 #define DEFAULT_STASH_SIZE   10000
8 /*
9    This stash is currently used for all the parallel matrix implementations.
10    The stash is where elements of a matrix destined to be stored on other
11    processors are kept until matrix assembly is done.
12 
13    This is a simple minded stash. Simply add entry to end of stash.
14 */
15 
16 #undef __FUNC__
17 #define __FUNC__ "StashCreate_Private"
18 int StashCreate_Private(MPI_Comm comm,int bs_stash, int bs_mat, Stash *stash)
19 {
20   int ierr,flg,max=DEFAULT_STASH_SIZE/(bs_stash*bs_stash);
21 
22   PetscFunctionBegin;
23   /* Require 2 tags, get the second using PetscCommGetNewTag() */
24   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
25   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr);
26   ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr);
27   ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr);
28   ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr);
29   ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr);
30 
31   if (bs_stash <= 0) bs_stash = 1;
32   if (bs_mat   <= 0) bs_mat   = 1;
33 
34   stash->bs_stash = bs_stash;
35   stash->bs_mat   = bs_mat;
36   stash->nmax     = 0;
37   stash->n        = 0;
38   stash->reallocs = 0;
39   stash->idx      = 0;
40   stash->idy      = 0;
41   stash->array    = 0;
42 
43   stash->send_waits  = 0;
44   stash->recv_waits  = 0;
45   stash->send_status = 0;
46   stash->nsends      = 0;
47   stash->nrecvs      = 0;
48   stash->svalues     = 0;
49   stash->rvalues     = 0;
50   stash->rmax        = 0;
51   stash->nprocs      = 0;
52   stash->nprocessed  = 0;
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNC__
57 #define __FUNC__ "StashDestroy_Private"
58 int StashDestroy_Private(Stash *stash)
59 {
60   int ierr;
61 
62   PetscFunctionBegin;
63   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
64   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNC__
69 #define __FUNC__ "StashScatterEnd_Private"
70 int StashScatterEnd_Private(Stash *stash)
71 {
72   int         nsends=stash->nsends,ierr;
73   MPI_Status  *send_status;
74 
75   PetscFunctionBegin;
76   /* wait on sends */
77   if (nsends) {
78     send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
79     ierr        = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
80     PetscFree(send_status);
81   }
82 
83   /* Now update nmaxold to be app 10% more than nmax, this way the
84      wastage of space is reduced the next time this stash is used */
85   stash->oldnmax    = (int)(stash->nmax * 1.1) + 5;
86   stash->nmax       = 0;
87   stash->n          = 0;
88   stash->reallocs   = 0;
89   stash->rmax       = 0;
90   stash->nprocessed = 0;
91 
92   if (stash->array) {
93     PetscFree(stash->array);
94     stash->array = 0;
95     stash->idx   = 0;
96     stash->idy   = 0;
97   }
98   if (stash->send_waits)  {PetscFree(stash->send_waits);stash->send_waits = 0;}
99   if (stash->recv_waits)  {PetscFree(stash->recv_waits);stash->recv_waits = 0;}
100   if (stash->svalues)     {PetscFree(stash->svalues);stash->svalues = 0;}
101   if (stash->rvalues)     {PetscFree(stash->rvalues); stash->rvalues = 0;}
102   if (stash->nprocs)      {PetscFree(stash->nprocs); stash->nprocs = 0;}
103 
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNC__
108 #define __FUNC__ "StashInfo_Private"
109 int StashInfo_Private(Stash *stash)
110 {
111   PetscFunctionBegin;
112   PLogInfo(0,"StashInfo_Private:Stash size %d, mallocs incured %d\n",stash->n,stash->reallocs);
113   PetscFunctionReturn(0);
114 }
115 #undef __FUNC__
116 #define __FUNC__ "StashSetInitialSize_Private"
117 int StashSetInitialSize_Private(Stash *stash,int max)
118 {
119   PetscFunctionBegin;
120   stash->oldnmax = max;
121   stash->nmax    = 0;
122   PetscFunctionReturn(0);
123 }
124 
125 #undef __FUNC__
126 #define __FUNC__ "StashExpand_Private"
127 static int StashExpand_Private(Stash *stash)
128 {
129   int    *n_idx,*n_idy,newnmax,bs2;
130   Scalar *n_array;
131 
132   PetscFunctionBegin;
133   /* allocate a larger stash */
134   if (stash->nmax == 0) newnmax = stash->oldnmax;
135   else                  newnmax = stash->nmax*2;
136 
137   bs2     = stash->bs_stash*stash->bs_stash;
138   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array);
139   n_idx   = (int *) (n_array + bs2*newnmax);
140   n_idy   = (int *) (n_idx + newnmax);
141   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar));
142   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
143   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
144   if (stash->array) PetscFree(stash->array);
145   stash->array   = n_array;
146   stash->idx     = n_idx;
147   stash->idy     = n_idy;
148   stash->nmax    = newnmax;
149   stash->oldnmax = newnmax;
150   stash->reallocs++;
151   PetscFunctionReturn(0);
152 }
153 
154 /*
155     Should do this properly. With a sorted array.
156 */
157 #undef __FUNC__
158 #define __FUNC__ "StashValues_Private"
159 int StashValues_Private(Stash *stash,int row,int n, int *idxn,Scalar *values)
160 {
161   int    ierr,i;
162 
163   PetscFunctionBegin;
164   for ( i=0; i<n; i++ ) {
165     if ( stash->n == stash->nmax ) {
166       ierr = StashExpand_Private(stash); CHKERRQ(ierr);
167     }
168     stash->idx[stash->n]   = row;
169     stash->idy[stash->n]   = idxn[i];
170     stash->array[stash->n] = values[i];
171     stash->n++;
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNC__
177 #define __FUNC__ "StashValuesBlocked_Private"
178 int StashValuesBlocked_Private(Stash *stash,int row,int n,int *idxn,Scalar *values,
179                                int rmax,int cmax,int roworiented,int idx)
180 {
181   int    ierr,i,j,k,bs2,bs=stash->bs_stash;
182   Scalar *vals,*array;
183 
184   /* stepval gives the offset that one should use to access the next line of
185      a given block of values */
186 
187   PetscFunctionBegin;
188   bs2 = bs*bs;
189   for ( i=0; i<n; i++ ) {
190     if ( stash->n == stash->nmax ) {
191       ierr = StashExpand_Private(stash); CHKERRQ(ierr);
192     }
193     stash->idx[stash->n]   = row;
194     stash->idy[stash->n] = idxn[i];
195     /* Now copy over the block of values. Store the values column oriented.
196      This enables inserting multiple blocks belonging to a row with a single
197      funtion call */
198     if (roworiented) {
199       array = stash->array + bs2*stash->n;
200       vals  = values + idx*bs2*n + bs*i;
201       for ( j=0; j<bs; j++ ) {
202         for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];}
203         array += 1;
204         vals  += cmax*bs;
205       }
206     } else {
207       array = stash->array + bs2*stash->n;
208       vals  = values + idx*bs + bs2*rmax*i;
209       for ( j=0; j<bs; j++ ) {
210         for ( k=0; k<bs; k++ ) {array[k] = vals[k];}
211         array += bs;
212         vals  += rmax*bs;
213       }
214     }
215     stash->n++;
216   }
217   PetscFunctionReturn(0);
218 }
219 
220 #undef __FUNC__
221 #define __FUNC__ "StashScatterBegin_Private"
222 int StashScatterBegin_Private(Stash *stash,int *owners)
223 {
224   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
225   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
226   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx,mult;
227   Scalar      *rvalues,*svalues;
228   MPI_Comm    comm = stash->comm;
229   MPI_Request *send_waits,*recv_waits;
230 
231   PetscFunctionBegin;
232 
233   bs2 = stash->bs_stash*stash->bs_stash;
234   /*  first count number of contributors to each processor */
235   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
236   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
237   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
238 
239   /* if blockstash, then the the owners and row,col indices
240    correspond to reduced indices */
241   if (stash->bs_stash == 1) mult = stash->bs_mat;
242   else                      mult = 1;
243 
244   for ( i=0; i<stash->n; i++ ) {
245     idx = stash->idx[i];
246     for ( j=0; j<size; j++ ) {
247       if (idx >= mult*owners[j] && idx < mult*owners[j+1]) {
248         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
249       }
250     }
251   }
252   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
253 
254   /* inform other processors of number of messages and max length*/
255   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
256   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
257   nreceives = work[rank];
258   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
259   nmax = work[rank];
260   PetscFree(work);
261   /* post receives:
262      since we don't know how long each individual message is we
263      allocate the largest needed buffer for each receive. Potentially
264      this is a lot of wasted space.
265   */
266   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues);
267   rindices   = (int *) (rvalues + bs2*nreceives*nmax);
268   recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits);
269   for ( i=0,count=0; i<nreceives; i++ ) {
270     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
271                      recv_waits+count++); CHKERRQ(ierr);
272     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
273                      recv_waits+count++); CHKERRQ(ierr);
274   }
275 
276   /* do sends:
277       1) starts[i] gives the starting index in svalues for stuff going to
278          the ith processor
279   */
280   svalues    = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues);
281   sindices   = (int *) (svalues + bs2*stash->n);
282   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
283   CHKPTRQ(send_waits);
284   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
285   starti     = startv + size;
286   /* use 2 sends the first with all_a, the next with all_i and all_j */
287   startv[0]  = 0; starti[0] = 0;
288   for ( i=1; i<size; i++ ) {
289     startv[i] = startv[i-1] + nprocs[i-1];
290     starti[i] = starti[i-1] + nprocs[i-1]*2;
291   }
292   for ( i=0; i<stash->n; i++ ) {
293     j = owner[i];
294     if (bs2 == 1) {
295       svalues[startv[j]]              = stash->array[i];
296     } else {
297       PetscMemcpy(svalues+bs2*startv[j],stash->array+bs2*i,bs2*sizeof(Scalar));
298     }
299     sindices[starti[j]]             = stash->idx[i];
300     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
301     startv[j]++;
302     starti[j]++;
303   }
304   startv[0] = 0;
305   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
306   for ( i=0,count=0; i<size; i++ ) {
307     if (procs[i]) {
308       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm,
309                        send_waits+count++);CHKERRQ(ierr);
310       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
311                        send_waits+count++);CHKERRQ(ierr);
312     }
313   }
314   PetscFree(owner);
315   PetscFree(startv);
316   /* This memory is reused in scatter end  for a different purpose*/
317   for (i=0; i<2*size; i++ ) nprocs[i] = -1;
318   stash->nprocs      = nprocs;
319 
320   stash->svalues    = svalues;    stash->rvalues    = rvalues;
321   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
322   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
323   stash->rmax       = nmax;
324   PetscFunctionReturn(0);
325 }
326 
327 /*
328    This function waits on the receives posted in the function
329    StashScatterBegin_Private() and returns one message at a time to
330    the calling function. If no messages are left, it indicates this by
331    setting flg = 0, else it sets flg = 1.
332 */
333 #undef __FUNC__
334 #define __FUNC__ "StashScatterGetMesg_Private"
335 int StashScatterGetMesg_Private(Stash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg)
336 {
337   int         i,ierr,size=stash->size,*flg_v,*flg_i;
338   int         i1,i2,*rindices,match_found=0,bs2;
339   MPI_Status  recv_status;
340 
341   PetscFunctionBegin;
342 
343   *flg = 0; /* When a message is discovered this is reset to 1 */
344   /* Return if no more messages to process */
345   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
346 
347   flg_v = stash->nprocs;
348   flg_i = flg_v + size;
349   bs2   = stash->bs_stash*stash->bs_stash;
350   /* If a matching pair of receieves are found, process them, and return the data to
351      the calling function. Until then keep receiving messages */
352   while (!match_found) {
353     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
354     /* Now pack the received message into a structure which is useable by others */
355     if (i % 2) {
356       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
357       flg_i[recv_status.MPI_SOURCE] = i/2;
358       *nvals = *nvals/2; /* This message has both row indices and col indices */
359     } else {
360       ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr);
361       flg_v[recv_status.MPI_SOURCE] = i/2;
362       *nvals = *nvals/bs2;
363     }
364 
365     /* Check if we have both the messages from this proc */
366     i1 = flg_v[recv_status.MPI_SOURCE];
367     i2 = flg_i[recv_status.MPI_SOURCE];
368     if (i1 != -1 && i2 != -1) {
369       rindices    = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs);
370       *rows       = rindices + 2*i2*stash->rmax;
371       *cols       = *rows + *nvals;
372       *vals       = stash->rvalues + i1*bs2*stash->rmax;
373       *flg        = 1;
374       stash->nprocessed ++;
375       match_found = 1;
376     }
377   }
378   PetscFunctionReturn(0);
379 }
380