xref: /petsc/src/mat/utils/matstash.c (revision 91e9ee9fc0200d5446579c7185d9e655300e8525)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: stash.c,v 1.20 1999/02/25 22:48:34 balay Exp balay $";
3 #endif
4 
5 #include "src/vec/vecimpl.h"
6 #include "src/mat/matimpl.h"
7 
8 #define DEFAULT_STASH_SIZE   10000
9 /*
10    This stash is currently used for all the parallel matrix implementations.
11    The stash is where elements of a matrix destined to be stored on other
12    processors are kept until matrix assembly is done.
13 
14    This is a simple minded stash. Simply add entry to end of stash.
15 */
16 
17 #undef __FUNC__
18 #define __FUNC__ "StashCreate_Private"
19 int StashCreate_Private(MPI_Comm comm,int bs, Stash *stash)
20 {
21   int ierr,flg,max=DEFAULT_STASH_SIZE;
22 
23   PetscFunctionBegin;
24   /* Require 2 tags, get the second using PetscCommGetNewTag() */
25   ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr);
26   ierr = PetscCommGetNewTag(comm,&stash->tag2); CHKERRQ(ierr);
27   ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr);
28   ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr);
29 
30   if (bs <= 0) bs = 1;
31   stash->bs       = bs;
32   stash->nmax     = 0;
33   stash->n        = 0;
34   stash->reallocs = 0;
35   stash->idx      = 0;
36   stash->idy      = 0;
37   stash->array    = 0;
38 
39   stash->send_waits = 0;
40   stash->recv_waits = 0;
41   stash->nsends     = 0;
42   stash->nrecvs     = 0;
43   stash->svalues    = 0;
44   stash->rvalues    = 0;
45   stash->rmax       = 0;
46   stash->rdata      = 0;
47   PetscFunctionReturn(0);
48 }
49 
50 #undef __FUNC__
51 #define __FUNC__ "StashDestroy_Private"
52 int StashDestroy_Private(Stash *stash)
53 {
54   int ierr;
55   PetscFunctionBegin;
56   ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr);
57   if (stash->array) {PetscFree(stash->array); stash->array = 0;}
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNC__
62 #define __FUNC__ "StashReset_Private"
63 int StashReset_Private(Stash *stash)
64 {
65   PetscFunctionBegin;
66   /* Now update nmaxold to be app 10% more than nmax, this way the
67      wastage of space is reduced the next time this stash is used */
68   stash->oldnmax  = (int)(stash->nmax * 1.1) + 5;
69   stash->nmax     = 0;
70   stash->n        = 0;
71   stash->reallocs = 0;
72   stash->rmax     = 0;
73 
74   if (stash->array) {
75     PetscFree(stash->array);
76     stash->array = 0;
77     stash->idx   = 0;
78     stash->idy   = 0;
79   }
80   if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;}
81   if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits =0;}
82   if (stash->svalues)    {PetscFree(stash->svalues);stash->svalues = 0;}
83   if (stash->rvalues)    {PetscFree(stash->rvalues); stash->rvalues = 0;}
84   if (stash->rdata)      {PetscFree(stash->rdata); stash->rdata = 0;}
85 
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNC__
90 #define __FUNC__ "StashInfo_Private"
91 int StashInfo_Private(Stash *stash)
92 {
93   PetscFunctionBegin;
94   PLogInfo(0,"StashInfo_Private:Stash size %d, mallocs incured %d\n",stash->n,stash->reallocs);
95   PetscFunctionReturn(0);
96 }
97 #undef __FUNC__
98 #define __FUNC__ "StashSetInitialSize_Private"
99 int StashSetInitialSize_Private(Stash *stash,int max)
100 {
101   PetscFunctionBegin;
102   stash->oldnmax = max;
103   stash->nmax    = 0;
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNC__
108 #define __FUNC__ "StashExpand_Private"
109 static int StashExpand_Private(Stash *stash)
110 {
111   int    *n_idx,*n_idy,newnmax;
112   Scalar *n_array;
113 
114   PetscFunctionBegin;
115   /* allocate a larger stash */
116   if (stash->nmax == 0) newnmax = stash->oldnmax;
117   else                  newnmax = stash->nmax *2;
118 
119   n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+sizeof(Scalar)));CHKPTRQ(n_array);
120   n_idx = (int *) (n_array + newnmax);
121   n_idy = (int *) (n_idx + newnmax);
122   PetscMemcpy(n_array,stash->array,stash->nmax*sizeof(Scalar));
123   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
124   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
125   if (stash->array) PetscFree(stash->array);
126   stash->array   = n_array;
127   stash->idx     = n_idx;
128   stash->idy     = n_idy;
129   stash->nmax    = newnmax;
130   stash->oldnmax = newnmax;
131   stash->reallocs++;
132   PetscFunctionReturn(0);
133 }
134 
135 /*
136     Should do this properly. With a sorted array.
137 */
138 #undef __FUNC__
139 #define __FUNC__ "StashValues_Private"
140 int StashValues_Private(Stash *stash,int row,int n, int *idxn,Scalar *values,InsertMode addv)
141 {
142   int    ierr,i,found;
143   Scalar val;
144 
145   PetscFunctionBegin;
146   for ( i=0; i<n; i++ ) {
147     found = 0;
148     val = *values++;
149     if (!found) { /* not found so add to end */
150       if ( stash->n == stash->nmax ) {
151         ierr = StashExpand_Private(stash); CHKERRQ(ierr);
152       }
153       stash->array[stash->n] = val;
154       stash->idx[stash->n]   = row;
155       stash->idy[stash->n++] = idxn[i];
156     }
157   }
158   PetscFunctionReturn(0);
159 }
160 
161 #undef __FUNC__
162 #define __FUNC__ "StashScatterBegin_Private"
163 int StashScatterBegin_Private(Stash *stash,int *owners)
164 {
165   MPI_Comm    comm = stash->comm;
166   MPI_Request *send_waits,*recv_waits;
167   Scalar      *rvalues,*svalues;
168   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2;
169   int         rank,size,*nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
170   int         count,ierr,*sindices,*rindices,bs=stash->bs;
171 
172   PetscFunctionBegin;
173 
174   ierr = MPI_Comm_size(comm,&size); CHKERRQ(ierr);
175   ierr = MPI_Comm_rank(comm,&rank); CHKERRQ(ierr);
176 
177   /*  first count number of contributors to each processor */
178   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
179   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
180   owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner);
181   for ( i=0; i<stash->n; i++ ) {
182     idx = stash->idx[i];
183     for ( j=0; j<size; j++ ) {
184       if (idx >= bs*owners[j] && idx < bs*owners[j+1]) {
185         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
186       }
187     }
188   }
189   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
190 
191   /* inform other processors of number of messages and max length*/
192   work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work);
193   ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
194   nreceives = work[rank];
195   ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
196   nmax = work[rank];
197   PetscFree(work);
198   /* post receives:
199      since we don't know how long each individual message is we
200      allocate the largest needed buffer for each receive. Potentially
201      this is a lot of wasted space.
202   */
203   rvalues    = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(sizeof(Scalar)+2*sizeof(int)));
204   CHKPTRQ(rvalues);
205   rindices   = (int *) (rvalues + nreceives*nmax);
206   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));
207   CHKPTRQ(recv_waits);
208   for ( i=0,count=0; i<nreceives; i++ ) {
209     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,
210                      recv_waits+count++); CHKERRQ(ierr);
211     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,
212                      recv_waits+count++); CHKERRQ(ierr);
213   }
214 
215   /* do sends:
216       1) starts[i] gives the starting index in svalues for stuff going to
217          the ith processor
218   */
219   svalues    = (Scalar *) PetscMalloc((stash->n+1)*(sizeof(Scalar)+2*sizeof(int)));
220   CHKPTRQ(svalues);
221   sindices   = (int *) (svalues + stash->n);
222   send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request));
223   CHKPTRQ(send_waits);
224   startv     = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv);
225   starti     = startv + size;
226   /* use 2 sends the first with all_a, the next with all_i and then all_j */
227   startv[0]  = 0; starti[0] = 0;
228   for ( i=1; i<size; i++ ) {
229     startv[i] = startv[i-1] + nprocs[i-1];
230     starti[i] = starti[i-1] + nprocs[i-1]*2;
231   }
232   for ( i=0; i<stash->n; i++ ) {
233     j = owner[i];
234     svalues[startv[j]]              = stash->array[i];
235     sindices[starti[j]]             = stash->idx[i];
236     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
237     startv[j]++;
238     starti[j]++;
239   }
240   startv[0] = 0;
241   for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];}
242   for ( i=0,count=0; i<size; i++ ) {
243     if (procs[i]) {
244       ierr = MPI_Isend(svalues+startv[i],nprocs[i],MPIU_SCALAR,i,tag1,comm,
245                        send_waits+count++);CHKERRQ(ierr);
246       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
247                        send_waits+count++);CHKERRQ(ierr);
248     }
249   }
250   PetscFree(owner);
251   PetscFree(startv);
252   PetscFree(nprocs);
253   stash->svalues    = svalues;    stash->rvalues    = rvalues;
254   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
255   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
256   stash->rmax       = nmax;
257 
258   PetscFunctionReturn(0);
259 }
260 
261 
262 #undef __FUNC__
263 #define __FUNC__ "StashScatterEnd_Private"
264 int StashScatterEnd_Private(Stash *stash)
265 {
266   int         i,ierr,size,*nprocs;
267   MPI_Status  *send_status,*recv_status;
268   int         i1,i2,s1,s2,count,*rindices;
269 
270   PetscFunctionBegin;
271 
272   /* wait on receives */
273   ierr = MPI_Comm_size(stash->comm,&size); CHKERRQ(ierr);
274   if (stash->nrecvs) {
275     recv_status = (MPI_Status *) PetscMalloc(2*(stash->nrecvs+1)*sizeof(MPI_Status));
276     CHKPTRQ(recv_status);
277     ierr = MPI_Waitall(2*stash->nrecvs,stash->recv_waits,recv_status);CHKERRQ(ierr);
278     /* Now pack the received messages into a structure which is useable by others */
279     stash->rdata = (Stash_rdata *) PetscMalloc(stash->nrecvs*sizeof(Stash_rdata));
280     CHKPTRQ(stash->rdata);
281     nprocs = (int *) PetscMalloc((2*size+1)*sizeof(int)); CHKPTRQ(nprocs);
282     rindices = (int *) (stash->rvalues + stash->rmax*stash->nrecvs);
283     for (i=0; i<2*size+1; i++ ) nprocs[i] = -1;
284     for ( i=0; i<stash->nrecvs; i++ ){
285       nprocs[2*recv_status[2*i].MPI_SOURCE] = i;
286       nprocs[2*recv_status[2*i+1].MPI_SOURCE+1] = i;
287     }
288     for (i=0,count=0; i<size; i++) {
289       i1 = nprocs[2*i];
290       i2 = nprocs[2*i+1];
291       if (i1 != -1) {
292         if (i2 == -1) SETERRQ(1,0,"Internal Error");
293         ierr = MPI_Get_count(recv_status+2*i1,MPIU_SCALAR,&s1);CHKERRQ(ierr);
294         ierr = MPI_Get_count(recv_status+2*i2+1,MPI_INT,&s2);CHKERRQ(ierr);
295         if (s1*2 != s2) SETERRQ(1,0,"Internal Error");
296         stash->rdata[count].a = stash->rvalues + i1*stash->rmax;
297         stash->rdata[count].i = rindices + 2*i2*stash->rmax;
298         stash->rdata[count].j = stash->rdata[count].i + s1;
299         stash->rdata[count].n = s1;
300         count ++;
301       }
302     }
303     PetscFree(recv_status);
304     PetscFree(nprocs);
305   }
306   /* wait on sends */
307   if (stash->nsends) {
308     send_status = (MPI_Status *)PetscMalloc(2*stash->nsends*sizeof(MPI_Status));
309     CHKPTRQ(send_status);
310     ierr        = MPI_Waitall(2*stash->nsends,stash->send_waits,send_status);CHKERRQ(ierr);
311     PetscFree(send_status);
312   }
313   PetscFunctionReturn(0);
314 }
315