xref: /petsc/src/mat/impls/aij/mpi/mpiov.c (revision 8a9dbd8b220753ff5f67ca44320cbf27bbee3b16)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiov.c,v 1.7 1996/01/30 22:11:16 balay Exp balay $";
3 #endif
4 
5 #include "mpiaij.h"
6 int MatIncreaseOverlap_MPIAIJ_private(Mat, int, IS *);
7 
8 int MatIncreaseOverlap_MPIAIJ(Mat C, int is_max, IS *is, int ov)
9 {
10   int i, ierr;
11   if (ov < 0){ SETERRQ(1," MatIncreaseOverlap_MPIAIJ: negative overlap specified\n");}
12   for (i =0; i<ov; ++i) {
13     ierr = MatIncreaseOverlap_MPIAIJ_private(C, is_max, is); CHKERRQ(ierr);
14   }
15   return 0;
16 }
17 
18 
19 
20 int MatIncreaseOverlap_MPIAIJ_private(Mat C, int is_max, IS *is)
21 {
22   Mat_MPIAIJ  *c = (Mat_MPIAIJ *) C->data;
23   Mat          A = c->A, B = c->B;
24   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
25   int         **idx, *n, *w1, *w2, *w3, *w4, *rtable,**table,**data;
26   int         size, rank, m,i,j,k, ierr, **rbuf, row, proc, mct, msz, **outdat, **ptr;
27   int         *ctr, sum, *pa, tag, *tmp,bsz, nmsg , *isz, *isz1, **xdata, *xtable,rstart;
28   int         cstart, *ai, *aj, *bi, *bj, *garray, bsz1, **rbuf2;
29   MPI_Comm    comm;
30   MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2 ;
31   MPI_Status  *send_status ,*recv_status;
32                                 /* assume overlap = 1 */
33   comm   = C->comm;
34   tag    = C->tag;
35   size   = c->size;
36   rank   = c->rank;
37   m      = c->M;
38   rstart = c->rstart;
39   cstart = c->cstart;
40   ai     = a->i;
41   aj     = a->j;
42   bi     = b->i;
43   bj     = b->j;
44   garray = c->garray;
45 
46   idx    = (int **)PetscMalloc((is_max+1)*sizeof(int *)); CHKPTRQ(idx);
47   n      = (int *)PetscMalloc((is_max+1)*sizeof(int )); CHKPTRQ(n);
48   rtable = (int *)PetscMalloc((m+1)*sizeof(int )); CHKPTRQ(rtable);
49                                 /* Hash table for maping row ->proc */
50 
51   for ( i=0 ; i<is_max ; i++) {
52     ierr = ISGetIndices(is[i],&idx[i]);  CHKERRQ(ierr);
53     ierr = ISGetLocalSize(is[i],&n[i]);  CHKERRQ(ierr);
54   }
55 
56   /* Create hash table for the mapping :row -> proc*/
57   for( i=0, j=0; i< size; i++) {
58     for (; j <c->rowners[i+1]; j++) {
59       rtable[j] = i;
60     }
61   }
62 
63   /* evaluate communication - mesg to who, length of mesg, and buffer space
64      required. Based on this, buffers are allocated, and data copied into them*/
65   w1     = (int *)PetscMalloc((size)*4*sizeof(int )); CHKPTRQ(w1); /*  mesg size */
66   w2     = w1 + size;         /* if w2[i] marked, then a message to proc i*/
67   w3     = w2 + size;         /* no of IS that needs to be sent to proc i */
68   w4     = w3 + size;         /* temp work space used in determining w1, w2, w3 */
69   PetscMemzero(w1,(size)*3*sizeof(int)); /* initialise work vector*/
70   for ( i=0;  i<is_max ; i++) {
71     PetscMemzero(w4,(size)*sizeof(int)); /* initialise work vector*/
72     for ( j =0 ; j < n[i] ; j++) {
73       row  = idx[i][j];
74       proc = rtable[row];
75       w4[proc]++;
76     }
77     for( j = 0; j < size; j++){
78       if( w4[j] ) { w1[j] += w4[j];  w3[j] += 1;}
79     }
80   }
81 
82   mct      = 0;              /* no of outgoing messages */
83   msz      = 0;              /* total mesg length (for all proc */
84   w1[rank] = 0;              /* no mesg sent to intself */
85   w3[rank] = 0;
86   for (i =0; i < size ; i++) {
87     if (w1[i])  { w2[i] = 1; mct++;} /* there exists a message to proc i */
88   }
89   pa = (int *)PetscMalloc((mct +1)*sizeof(int)); CHKPTRQ(pa); /* (proc -array) */
90   for (i =0, j=0; i < size ; i++) {
91     if (w1[i]) { pa[j] = i; j++; }
92   }
93 
94   /* Each message would have a header = 1 + 2*(no of IS) + data */
95   for (i = 0; i<mct ; i++) {
96     j = pa[i];
97     w1[j] += w2[j] + 2* w3[j];
98     msz   += w1[j];
99   }
100 
101   /* Allocate Memory for outgoing messages */
102   outdat    = (int **)PetscMalloc( 2*size*sizeof(int*)); CHKPTRQ(outdat);
103   PetscMemzero(outdat,  2*size*sizeof(int*));
104   tmp       = (int *)PetscMalloc((msz+1) *sizeof (int)); CHKPTRQ(tmp); /*mrsg arr */
105   ptr       = outdat +size;     /* Pointers to the data in outgoing buffers */
106   ctr       = (int *)PetscMalloc( size*sizeof(int));   CHKPTRQ(ctr);
107 
108   {
109     int *iptr = tmp;
110     int ict  = 0;
111     for (i = 0; i < mct ; i++) {
112       j         = pa[i];
113       iptr     +=  ict;
114       outdat[j] = iptr;
115       ict       = w1[j];
116     }
117   }
118 
119   /* Form the outgoing messages */
120   /*plug in the headers*/
121   for ( i=0 ; i<mct ; i++) {
122     j = pa[i];
123     outdat[j][0] = 0;
124     PetscMemzero(outdat[j]+1, 2 * w3[j]*sizeof(int));
125     ptr[j] = outdat[j] + 2*w3[j] +1;
126   }
127 
128   /* Memory for doing local proc's work*/
129   table = (int **)PetscMalloc((is_max+1)*sizeof(int *));  CHKPTRQ(table);
130   data  = (int **)PetscMalloc((is_max+1)*sizeof(int *)); CHKPTRQ(data);
131   table[0] = (int *)PetscMalloc((m+1)*(is_max)*2*sizeof(int)); CHKPTRQ(table[0]);
132   data [0] = table[0] + (m+1)*(is_max);
133 
134   for(i = 1; i<is_max ; i++) {
135     table[i] = table[0] + (m+1)*i;
136     data[i]  = data[0] + (m+1)*i;
137   }
138 
139   PetscMemzero((void*)*table,(m+1)*(is_max)*sizeof(int));
140   isz = (int *)PetscMalloc((is_max+1) *sizeof(int)); CHKPTRQ(isz);
141   PetscMemzero((void *)isz,(is_max+1) *sizeof(int));
142 
143   /* Parse the IS and update local tables and the outgoing buf with the data*/
144   for ( i=0 ; i<is_max ; i++) {
145     PetscMemzero(ctr,size*sizeof(int));
146     for( j=0;  j<n[i]; j++) {  /* parse the indices of each IS */
147       row  = idx[i][j];
148       proc = rtable[row];
149       if (proc != rank) { /* copy to the outgoing buf*/
150         ctr[proc]++;
151         *ptr[proc] = row;
152         ptr[proc]++;
153       }
154       else { /* Update the table */
155         if(!table[i][row]++) { data[i][isz[i]++] = row;}
156       }
157     }
158     /* Update the headers for the current IS */
159     for( j = 0; j<size; j++) { /* Can Optimise this loop too */
160       if (ctr[j]) {
161         k= ++outdat[j][0];
162         outdat[j][2*k]   = ctr[j];
163         outdat[j][2*k-1] = i;
164       }
165     }
166   }
167 
168   /* Check Validity of the outgoing messages */
169   for ( i=0 ; i<mct ; i++) {
170     j = pa[i];
171     if (w3[j] != outdat[j][0]) {SETERRQ(1,"MatIncreaseOverlap_MPIAIJ: Blew it! Header[1] mismatch!\n"); }
172   }
173 
174   for ( i=0 ; i<mct ; i++) {
175     j = pa[i];
176     sum = 1;
177     for (k = 1; k <= w3[j]; k++) sum += outdat[j][2*k]+2;
178     if (sum != w1[j]) { SETERRQ(1,"MatIncreaseOverlap_MPIAIJ: Blew it! Header[2-n] mismatch!  \n"); }
179   }
180 
181 
182   /* Do a global reduction to determine how many messages to expect*/
183   {
184     int *rw1, *rw2;
185     rw1 = (int *)PetscMalloc(2*size*sizeof(int)); CHKPTRQ(rw1);
186     rw2 = rw1+size;
187     MPI_Allreduce((void *)w1, rw1, size, MPI_INT, MPI_MAX, comm);
188     bsz   = rw1[rank];
189     MPI_Allreduce((void *)w2, rw2, size, MPI_INT, MPI_SUM, comm);
190     nmsg  = rw2[rank];
191     PetscFree(rw1);
192   }
193 
194   /* Allocate memory for recv buffers . Prob none if nmsg = 0 ???? */
195   rbuf    = (int**) PetscMalloc((nmsg+1) *sizeof(int*));  CHKPTRQ(rbuf);
196   rbuf[0] = (int *) PetscMalloc((nmsg *bsz+1) * sizeof(int));  CHKPTRQ(rbuf[0]);
197   for (i=1; i<nmsg ; ++i) rbuf[i] = rbuf[i-1] + bsz;
198 
199   /* Now post the receives */
200   recv_waits = (MPI_Request *) PetscMalloc((nmsg+1)*sizeof(MPI_Request));
201   CHKPTRQ(recv_waits);
202   for ( i=0; i<nmsg; ++i){
203     MPI_Irecv((void *)rbuf[i], bsz, MPI_INT, MPI_ANY_SOURCE, tag, comm, recv_waits+i);
204   }
205 
206   /*  Now  post the sends */
207   send_waits = (MPI_Request *) PetscMalloc((mct+1)*sizeof(MPI_Request));
208   CHKPTRQ(send_waits);
209   for( i =0; i< mct; ++i){
210     j = pa[i];
211     MPI_Isend( (void *)outdat[j], w1[j], MPI_INT, j, tag, comm, send_waits+i);
212   }
213 
214   /* Do Local work*/
215   /* Extract the matrices */
216   {
217     int  start, end, val, max;
218 
219     for( i=0; i<is_max; i++) {
220       for ( j=0, max =isz[i] ; j< max; j++) {
221         row   = data[i][j] - rstart;
222         start = ai[row];
223         end   = ai[row+1];
224         for ( k=start; k < end; k++) { /* Amat */
225           val = aj[k] +cstart;
226           if(!table[i][val]++) { data[i][isz[i]++] = val;}
227         }
228         start = bi[row];
229         end   = bi[row+1];
230         for ( k=start; k < end; k++) { /* Bmat */
231           val = garray[bj[k]] ;
232           if(!table[i][val]++) { data[i][isz[i]++] = val;}
233         }
234       }
235     }
236   }
237   /* Receive messages*/
238   {
239     int        index;
240 
241     recv_status = (MPI_Status *) PetscMalloc( (nmsg+1)*sizeof(MPI_Status) );
242     CHKPTRQ(recv_status);
243     for ( i=0; i< nmsg; ++i) {
244       MPI_Waitany(nmsg, recv_waits, &index, recv_status+i);
245     }
246 
247     send_status = (MPI_Status *) PetscMalloc( (mct+1)*sizeof(MPI_Status) );
248     CHKPTRQ(send_status);
249     MPI_Waitall(mct,send_waits,send_status);
250   }
251 
252 
253   /* Do work on recieved messages*/
254   {
255     int ct, ct1, ct2;
256     int oct2, l, start, end, val, max1, max2;
257 
258 
259     for (i =0, ct =0; i< nmsg; ++i) ct+= rbuf[i][0];
260 
261     xdata   = (int **)PetscMalloc((nmsg+1)*sizeof(int *)); CHKPTRQ(xdata);
262     xtable  = (int *)PetscMalloc((ct+nmsg+(m+1)*(ct+1))*sizeof(int)); CHKPTRQ(xtable);
263     xdata [0] = xtable + (m+1);
264     isz1 = (int *)PetscMalloc((nmsg+1) *sizeof(int)); CHKPTRQ(isz1);
265     PetscMemzero((void *)isz1,(nmsg+1) *sizeof(int));
266 
267 
268     for (i =0; i< nmsg; i++) { /* for easch mesg from proc i */
269       ct1 = 2*rbuf[i][0]+1;
270       ct2 = 2*rbuf[i][0]+1;
271       for (j = 1, max1= rbuf[i][0]; j<=max1; j++) { /* for each IS from proc i*/
272         PetscMemzero((void *)xtable,(m+1)*sizeof(int));
273         oct2 = ct2;
274         for (k =0; k < rbuf[i][2*j]; k++, ct1++) {
275           row = rbuf[i][ct1];
276           if(!xtable[row]++) { xdata[i][ct2++] = row;}
277         }
278         for ( k=oct2, max2 =ct2 ; k< max2; k++) {
279           row   = xdata[i][k] - rstart;
280           start = ai[row];
281           end   = ai[row+1];
282           for ( l=start; l < end; l++) {
283             val = aj[l] +cstart;
284             if(!xtable[val]++) { xdata[i][ct2++] = val;}
285           }
286           start = bi[row];
287           end   = bi[row+1];
288           for ( l=start; l < end; l++) {
289             val = garray[bj[l]] ;
290             if(!xtable[val]++) { xdata[i][ct2++] = val;}
291           }
292         }
293         /* Update the header*/
294         xdata[i][2*j]   = ct2-oct2; /* Undo the vector isz1 and use only a var*/
295         xdata[i][2*j-1] = rbuf[i][2*j-1];
296       }
297       xdata[i][0] = rbuf[i][0];
298       xdata[i+1]  = xdata[i] +ct2;
299       isz1[i]     = ct2; /* size of each message */
300     }
301   }
302   /* need isz, xdata;*/
303 
304   /* Send the data back*/
305   /* Do a global reduction to know the buffer space req for incoming messages*/
306   {
307     int *rw1, *rw2;
308 
309     rw1 = (int *)PetscMalloc(2*size*sizeof(int)); CHKPTRQ(rw1);
310     PetscMemzero((void*)rw1,2*size*sizeof(int));
311     rw2 = rw1+size;
312     for (i =0; i < nmsg ; ++i) {
313       proc      = recv_status[i].MPI_SOURCE;
314       rw1[proc] = isz1[i];
315     }
316 
317     MPI_Allreduce((void *)rw1, (void *)rw2, size, MPI_INT, MPI_MAX, comm);
318     bsz1   = rw2[rank];
319     PetscFree(rw1);
320   }
321 
322   /* Allocate buffers*/
323 
324   /* Allocate memory for recv buffers . Prob none if nmsg = 0 ???? */
325   rbuf2    = (int**) PetscMalloc((mct+1) *sizeof(int*));  CHKPTRQ(rbuf2);
326   rbuf2[0] = (int *) PetscMalloc((mct*bsz1+1) * sizeof(int));  CHKPTRQ(rbuf2[0]);
327   for (i=1; i<mct ; ++i) rbuf2[i] = rbuf2[i-1] + bsz1;
328 
329   /* Now post the receives */
330   recv_waits2 = (MPI_Request *)PetscMalloc((mct+1)*sizeof(MPI_Request)); CHKPTRQ(recv_waits2)
331   CHKPTRQ(recv_waits2);
332   for ( i=0; i<mct; ++i){
333     MPI_Irecv((void *)rbuf2[i], bsz1, MPI_INT, MPI_ANY_SOURCE, tag, comm, recv_waits2+i);
334   }
335 
336   /*  Now  post the sends */
337   send_waits2 = (MPI_Request *) PetscMalloc((nmsg+1)*sizeof(MPI_Request));
338   CHKPTRQ(send_waits2);
339   for( i =0; i< nmsg; ++i){
340     j = recv_status[i].MPI_SOURCE;
341     MPI_Isend( (void *)xdata[i], isz1[i], MPI_INT, j, tag, comm, send_waits2+i);
342   }
343 
344   /* recieve work done on other processors*/
345   {
346     int         index, is_no, ct1, max;
347     MPI_Status  *send_status2 ,*recv_status2;
348 
349     recv_status2 = (MPI_Status *) PetscMalloc( (mct+1)*sizeof(MPI_Status) );
350     CHKPTRQ(recv_status2);
351 
352 
353     for ( i=0; i< mct; ++i) {
354       MPI_Waitany(mct, recv_waits2, &index, recv_status2+i);
355       /* Process the message*/
356       ct1 = 2*rbuf2[index][0]+1;
357       for (j=1; j<=rbuf2[index][0]; j++) {
358         max   = rbuf2[index][2*j];
359         is_no = rbuf2[index][2*j-1];
360         for (k=0; k < max ; k++, ct1++) {
361           row = rbuf2[index][ct1];
362           if(!table[is_no][row]++) { data[is_no][isz[is_no]++] = row;}
363         }
364       }
365     }
366 
367 
368     send_status2 = (MPI_Status *) PetscMalloc( (nmsg+1)*sizeof(MPI_Status) );
369     CHKPTRQ(send_status2);
370     MPI_Waitall(nmsg,send_waits2,send_status2);
371 
372     PetscFree(send_status2); PetscFree(recv_status2);
373   }
374   for( i=0; i< is_max; ++i) {
375     ierr = ISRestoreIndices(is[i], idx+i); CHKERRQ(ierr);
376   }
377   for( i=0; i< is_max; ++i) {
378     ierr = ISDestroy(is[i]); CHKERRQ(ierr);
379   }
380   for ( i=0; i<is_max; ++i) {
381     ierr = ISCreateSeq(MPI_COMM_SELF, isz[i], data[i], is+i); CHKERRQ(ierr);
382   }
383   /* pack up*/
384 
385   /* done !!!! */
386 
387   /* whew! already done? check again :) */
388   PetscFree(idx);
389   PetscFree(n);
390   PetscFree(rtable);
391   PetscFree(w1);
392   PetscFree(tmp);
393   PetscFree(outdat);
394   PetscFree(ctr);
395   PetscFree(pa);
396   PetscFree(rbuf[0]);
397   PetscFree(rbuf);
398   PetscFree(rbuf2[0]);
399   PetscFree(rbuf2);
400   PetscFree(send_waits);
401   PetscFree(recv_waits);
402   PetscFree(send_waits2);
403   PetscFree(recv_waits2);
404   PetscFree(table[0]);
405   PetscFree(table);
406   PetscFree(data);
407   PetscFree(send_status);
408   PetscFree(recv_status);
409   PetscFree(isz);
410   PetscFree(isz1);
411   PetscFree(xtable);
412   PetscFree(xdata);
413 
414   /* Dont forget to ISRestoreIndices */
415   return 0;
416 }
417