1 #ifndef lint 2 static char vcid[] = "$Id: mpiov.c,v 1.7 1996/01/30 22:11:16 balay Exp balay $"; 3 #endif 4 5 #include "mpiaij.h" 6 int MatIncreaseOverlap_MPIAIJ_private(Mat, int, IS *); 7 8 int MatIncreaseOverlap_MPIAIJ(Mat C, int is_max, IS *is, int ov) 9 { 10 int i, ierr; 11 if (ov < 0){ SETERRQ(1," MatIncreaseOverlap_MPIAIJ: negative overlap specified\n");} 12 for (i =0; i<ov; ++i) { 13 ierr = MatIncreaseOverlap_MPIAIJ_private(C, is_max, is); CHKERRQ(ierr); 14 } 15 return 0; 16 } 17 18 19 20 int MatIncreaseOverlap_MPIAIJ_private(Mat C, int is_max, IS *is) 21 { 22 Mat_MPIAIJ *c = (Mat_MPIAIJ *) C->data; 23 Mat A = c->A, B = c->B; 24 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; 25 int **idx, *n, *w1, *w2, *w3, *w4, *rtable,**table,**data; 26 int size, rank, m,i,j,k, ierr, **rbuf, row, proc, mct, msz, **outdat, **ptr; 27 int *ctr, sum, *pa, tag, *tmp,bsz, nmsg , *isz, *isz1, **xdata, *xtable,rstart; 28 int cstart, *ai, *aj, *bi, *bj, *garray, bsz1, **rbuf2; 29 MPI_Comm comm; 30 MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2 ; 31 MPI_Status *send_status ,*recv_status; 32 /* assume overlap = 1 */ 33 comm = C->comm; 34 tag = C->tag; 35 size = c->size; 36 rank = c->rank; 37 m = c->M; 38 rstart = c->rstart; 39 cstart = c->cstart; 40 ai = a->i; 41 aj = a->j; 42 bi = b->i; 43 bj = b->j; 44 garray = c->garray; 45 46 idx = (int **)PetscMalloc((is_max+1)*sizeof(int *)); CHKPTRQ(idx); 47 n = (int *)PetscMalloc((is_max+1)*sizeof(int )); CHKPTRQ(n); 48 rtable = (int *)PetscMalloc((m+1)*sizeof(int )); CHKPTRQ(rtable); 49 /* Hash table for maping row ->proc */ 50 51 for ( i=0 ; i<is_max ; i++) { 52 ierr = ISGetIndices(is[i],&idx[i]); CHKERRQ(ierr); 53 ierr = ISGetLocalSize(is[i],&n[i]); CHKERRQ(ierr); 54 } 55 56 /* Create hash table for the mapping :row -> proc*/ 57 for( i=0, j=0; i< size; i++) { 58 for (; j <c->rowners[i+1]; j++) { 59 rtable[j] = i; 60 } 61 } 62 63 /* evaluate communication - mesg to who, length of mesg, and buffer space 64 required. Based on this, buffers are allocated, and data copied into them*/ 65 w1 = (int *)PetscMalloc((size)*4*sizeof(int )); CHKPTRQ(w1); /* mesg size */ 66 w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/ 67 w3 = w2 + size; /* no of IS that needs to be sent to proc i */ 68 w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */ 69 PetscMemzero(w1,(size)*3*sizeof(int)); /* initialise work vector*/ 70 for ( i=0; i<is_max ; i++) { 71 PetscMemzero(w4,(size)*sizeof(int)); /* initialise work vector*/ 72 for ( j =0 ; j < n[i] ; j++) { 73 row = idx[i][j]; 74 proc = rtable[row]; 75 w4[proc]++; 76 } 77 for( j = 0; j < size; j++){ 78 if( w4[j] ) { w1[j] += w4[j]; w3[j] += 1;} 79 } 80 } 81 82 mct = 0; /* no of outgoing messages */ 83 msz = 0; /* total mesg length (for all proc */ 84 w1[rank] = 0; /* no mesg sent to intself */ 85 w3[rank] = 0; 86 for (i =0; i < size ; i++) { 87 if (w1[i]) { w2[i] = 1; mct++;} /* there exists a message to proc i */ 88 } 89 pa = (int *)PetscMalloc((mct +1)*sizeof(int)); CHKPTRQ(pa); /* (proc -array) */ 90 for (i =0, j=0; i < size ; i++) { 91 if (w1[i]) { pa[j] = i; j++; } 92 } 93 94 /* Each message would have a header = 1 + 2*(no of IS) + data */ 95 for (i = 0; i<mct ; i++) { 96 j = pa[i]; 97 w1[j] += w2[j] + 2* w3[j]; 98 msz += w1[j]; 99 } 100 101 /* Allocate Memory for outgoing messages */ 102 outdat = (int **)PetscMalloc( 2*size*sizeof(int*)); CHKPTRQ(outdat); 103 PetscMemzero(outdat, 2*size*sizeof(int*)); 104 tmp = (int *)PetscMalloc((msz+1) *sizeof (int)); CHKPTRQ(tmp); /*mrsg arr */ 105 ptr = outdat +size; /* Pointers to the data in outgoing buffers */ 106 ctr = (int *)PetscMalloc( size*sizeof(int)); CHKPTRQ(ctr); 107 108 { 109 int *iptr = tmp; 110 int ict = 0; 111 for (i = 0; i < mct ; i++) { 112 j = pa[i]; 113 iptr += ict; 114 outdat[j] = iptr; 115 ict = w1[j]; 116 } 117 } 118 119 /* Form the outgoing messages */ 120 /*plug in the headers*/ 121 for ( i=0 ; i<mct ; i++) { 122 j = pa[i]; 123 outdat[j][0] = 0; 124 PetscMemzero(outdat[j]+1, 2 * w3[j]*sizeof(int)); 125 ptr[j] = outdat[j] + 2*w3[j] +1; 126 } 127 128 /* Memory for doing local proc's work*/ 129 table = (int **)PetscMalloc((is_max+1)*sizeof(int *)); CHKPTRQ(table); 130 data = (int **)PetscMalloc((is_max+1)*sizeof(int *)); CHKPTRQ(data); 131 table[0] = (int *)PetscMalloc((m+1)*(is_max)*2*sizeof(int)); CHKPTRQ(table[0]); 132 data [0] = table[0] + (m+1)*(is_max); 133 134 for(i = 1; i<is_max ; i++) { 135 table[i] = table[0] + (m+1)*i; 136 data[i] = data[0] + (m+1)*i; 137 } 138 139 PetscMemzero((void*)*table,(m+1)*(is_max)*sizeof(int)); 140 isz = (int *)PetscMalloc((is_max+1) *sizeof(int)); CHKPTRQ(isz); 141 PetscMemzero((void *)isz,(is_max+1) *sizeof(int)); 142 143 /* Parse the IS and update local tables and the outgoing buf with the data*/ 144 for ( i=0 ; i<is_max ; i++) { 145 PetscMemzero(ctr,size*sizeof(int)); 146 for( j=0; j<n[i]; j++) { /* parse the indices of each IS */ 147 row = idx[i][j]; 148 proc = rtable[row]; 149 if (proc != rank) { /* copy to the outgoing buf*/ 150 ctr[proc]++; 151 *ptr[proc] = row; 152 ptr[proc]++; 153 } 154 else { /* Update the table */ 155 if(!table[i][row]++) { data[i][isz[i]++] = row;} 156 } 157 } 158 /* Update the headers for the current IS */ 159 for( j = 0; j<size; j++) { /* Can Optimise this loop too */ 160 if (ctr[j]) { 161 k= ++outdat[j][0]; 162 outdat[j][2*k] = ctr[j]; 163 outdat[j][2*k-1] = i; 164 } 165 } 166 } 167 168 /* Check Validity of the outgoing messages */ 169 for ( i=0 ; i<mct ; i++) { 170 j = pa[i]; 171 if (w3[j] != outdat[j][0]) {SETERRQ(1,"MatIncreaseOverlap_MPIAIJ: Blew it! Header[1] mismatch!\n"); } 172 } 173 174 for ( i=0 ; i<mct ; i++) { 175 j = pa[i]; 176 sum = 1; 177 for (k = 1; k <= w3[j]; k++) sum += outdat[j][2*k]+2; 178 if (sum != w1[j]) { SETERRQ(1,"MatIncreaseOverlap_MPIAIJ: Blew it! Header[2-n] mismatch! \n"); } 179 } 180 181 182 /* Do a global reduction to determine how many messages to expect*/ 183 { 184 int *rw1, *rw2; 185 rw1 = (int *)PetscMalloc(2*size*sizeof(int)); CHKPTRQ(rw1); 186 rw2 = rw1+size; 187 MPI_Allreduce((void *)w1, rw1, size, MPI_INT, MPI_MAX, comm); 188 bsz = rw1[rank]; 189 MPI_Allreduce((void *)w2, rw2, size, MPI_INT, MPI_SUM, comm); 190 nmsg = rw2[rank]; 191 PetscFree(rw1); 192 } 193 194 /* Allocate memory for recv buffers . Prob none if nmsg = 0 ???? */ 195 rbuf = (int**) PetscMalloc((nmsg+1) *sizeof(int*)); CHKPTRQ(rbuf); 196 rbuf[0] = (int *) PetscMalloc((nmsg *bsz+1) * sizeof(int)); CHKPTRQ(rbuf[0]); 197 for (i=1; i<nmsg ; ++i) rbuf[i] = rbuf[i-1] + bsz; 198 199 /* Now post the receives */ 200 recv_waits = (MPI_Request *) PetscMalloc((nmsg+1)*sizeof(MPI_Request)); 201 CHKPTRQ(recv_waits); 202 for ( i=0; i<nmsg; ++i){ 203 MPI_Irecv((void *)rbuf[i], bsz, MPI_INT, MPI_ANY_SOURCE, tag, comm, recv_waits+i); 204 } 205 206 /* Now post the sends */ 207 send_waits = (MPI_Request *) PetscMalloc((mct+1)*sizeof(MPI_Request)); 208 CHKPTRQ(send_waits); 209 for( i =0; i< mct; ++i){ 210 j = pa[i]; 211 MPI_Isend( (void *)outdat[j], w1[j], MPI_INT, j, tag, comm, send_waits+i); 212 } 213 214 /* Do Local work*/ 215 /* Extract the matrices */ 216 { 217 int start, end, val, max; 218 219 for( i=0; i<is_max; i++) { 220 for ( j=0, max =isz[i] ; j< max; j++) { 221 row = data[i][j] - rstart; 222 start = ai[row]; 223 end = ai[row+1]; 224 for ( k=start; k < end; k++) { /* Amat */ 225 val = aj[k] +cstart; 226 if(!table[i][val]++) { data[i][isz[i]++] = val;} 227 } 228 start = bi[row]; 229 end = bi[row+1]; 230 for ( k=start; k < end; k++) { /* Bmat */ 231 val = garray[bj[k]] ; 232 if(!table[i][val]++) { data[i][isz[i]++] = val;} 233 } 234 } 235 } 236 } 237 /* Receive messages*/ 238 { 239 int index; 240 241 recv_status = (MPI_Status *) PetscMalloc( (nmsg+1)*sizeof(MPI_Status) ); 242 CHKPTRQ(recv_status); 243 for ( i=0; i< nmsg; ++i) { 244 MPI_Waitany(nmsg, recv_waits, &index, recv_status+i); 245 } 246 247 send_status = (MPI_Status *) PetscMalloc( (mct+1)*sizeof(MPI_Status) ); 248 CHKPTRQ(send_status); 249 MPI_Waitall(mct,send_waits,send_status); 250 } 251 252 253 /* Do work on recieved messages*/ 254 { 255 int ct, ct1, ct2; 256 int oct2, l, start, end, val, max1, max2; 257 258 259 for (i =0, ct =0; i< nmsg; ++i) ct+= rbuf[i][0]; 260 261 xdata = (int **)PetscMalloc((nmsg+1)*sizeof(int *)); CHKPTRQ(xdata); 262 xtable = (int *)PetscMalloc((ct+nmsg+(m+1)*(ct+1))*sizeof(int)); CHKPTRQ(xtable); 263 xdata [0] = xtable + (m+1); 264 isz1 = (int *)PetscMalloc((nmsg+1) *sizeof(int)); CHKPTRQ(isz1); 265 PetscMemzero((void *)isz1,(nmsg+1) *sizeof(int)); 266 267 268 for (i =0; i< nmsg; i++) { /* for easch mesg from proc i */ 269 ct1 = 2*rbuf[i][0]+1; 270 ct2 = 2*rbuf[i][0]+1; 271 for (j = 1, max1= rbuf[i][0]; j<=max1; j++) { /* for each IS from proc i*/ 272 PetscMemzero((void *)xtable,(m+1)*sizeof(int)); 273 oct2 = ct2; 274 for (k =0; k < rbuf[i][2*j]; k++, ct1++) { 275 row = rbuf[i][ct1]; 276 if(!xtable[row]++) { xdata[i][ct2++] = row;} 277 } 278 for ( k=oct2, max2 =ct2 ; k< max2; k++) { 279 row = xdata[i][k] - rstart; 280 start = ai[row]; 281 end = ai[row+1]; 282 for ( l=start; l < end; l++) { 283 val = aj[l] +cstart; 284 if(!xtable[val]++) { xdata[i][ct2++] = val;} 285 } 286 start = bi[row]; 287 end = bi[row+1]; 288 for ( l=start; l < end; l++) { 289 val = garray[bj[l]] ; 290 if(!xtable[val]++) { xdata[i][ct2++] = val;} 291 } 292 } 293 /* Update the header*/ 294 xdata[i][2*j] = ct2-oct2; /* Undo the vector isz1 and use only a var*/ 295 xdata[i][2*j-1] = rbuf[i][2*j-1]; 296 } 297 xdata[i][0] = rbuf[i][0]; 298 xdata[i+1] = xdata[i] +ct2; 299 isz1[i] = ct2; /* size of each message */ 300 } 301 } 302 /* need isz, xdata;*/ 303 304 /* Send the data back*/ 305 /* Do a global reduction to know the buffer space req for incoming messages*/ 306 { 307 int *rw1, *rw2; 308 309 rw1 = (int *)PetscMalloc(2*size*sizeof(int)); CHKPTRQ(rw1); 310 PetscMemzero((void*)rw1,2*size*sizeof(int)); 311 rw2 = rw1+size; 312 for (i =0; i < nmsg ; ++i) { 313 proc = recv_status[i].MPI_SOURCE; 314 rw1[proc] = isz1[i]; 315 } 316 317 MPI_Allreduce((void *)rw1, (void *)rw2, size, MPI_INT, MPI_MAX, comm); 318 bsz1 = rw2[rank]; 319 PetscFree(rw1); 320 } 321 322 /* Allocate buffers*/ 323 324 /* Allocate memory for recv buffers . Prob none if nmsg = 0 ???? */ 325 rbuf2 = (int**) PetscMalloc((mct+1) *sizeof(int*)); CHKPTRQ(rbuf2); 326 rbuf2[0] = (int *) PetscMalloc((mct*bsz1+1) * sizeof(int)); CHKPTRQ(rbuf2[0]); 327 for (i=1; i<mct ; ++i) rbuf2[i] = rbuf2[i-1] + bsz1; 328 329 /* Now post the receives */ 330 recv_waits2 = (MPI_Request *)PetscMalloc((mct+1)*sizeof(MPI_Request)); CHKPTRQ(recv_waits2) 331 CHKPTRQ(recv_waits2); 332 for ( i=0; i<mct; ++i){ 333 MPI_Irecv((void *)rbuf2[i], bsz1, MPI_INT, MPI_ANY_SOURCE, tag, comm, recv_waits2+i); 334 } 335 336 /* Now post the sends */ 337 send_waits2 = (MPI_Request *) PetscMalloc((nmsg+1)*sizeof(MPI_Request)); 338 CHKPTRQ(send_waits2); 339 for( i =0; i< nmsg; ++i){ 340 j = recv_status[i].MPI_SOURCE; 341 MPI_Isend( (void *)xdata[i], isz1[i], MPI_INT, j, tag, comm, send_waits2+i); 342 } 343 344 /* recieve work done on other processors*/ 345 { 346 int index, is_no, ct1, max; 347 MPI_Status *send_status2 ,*recv_status2; 348 349 recv_status2 = (MPI_Status *) PetscMalloc( (mct+1)*sizeof(MPI_Status) ); 350 CHKPTRQ(recv_status2); 351 352 353 for ( i=0; i< mct; ++i) { 354 MPI_Waitany(mct, recv_waits2, &index, recv_status2+i); 355 /* Process the message*/ 356 ct1 = 2*rbuf2[index][0]+1; 357 for (j=1; j<=rbuf2[index][0]; j++) { 358 max = rbuf2[index][2*j]; 359 is_no = rbuf2[index][2*j-1]; 360 for (k=0; k < max ; k++, ct1++) { 361 row = rbuf2[index][ct1]; 362 if(!table[is_no][row]++) { data[is_no][isz[is_no]++] = row;} 363 } 364 } 365 } 366 367 368 send_status2 = (MPI_Status *) PetscMalloc( (nmsg+1)*sizeof(MPI_Status) ); 369 CHKPTRQ(send_status2); 370 MPI_Waitall(nmsg,send_waits2,send_status2); 371 372 PetscFree(send_status2); PetscFree(recv_status2); 373 } 374 for( i=0; i< is_max; ++i) { 375 ierr = ISRestoreIndices(is[i], idx+i); CHKERRQ(ierr); 376 } 377 for( i=0; i< is_max; ++i) { 378 ierr = ISDestroy(is[i]); CHKERRQ(ierr); 379 } 380 for ( i=0; i<is_max; ++i) { 381 ierr = ISCreateSeq(MPI_COMM_SELF, isz[i], data[i], is+i); CHKERRQ(ierr); 382 } 383 /* pack up*/ 384 385 /* done !!!! */ 386 387 /* whew! already done? check again :) */ 388 PetscFree(idx); 389 PetscFree(n); 390 PetscFree(rtable); 391 PetscFree(w1); 392 PetscFree(tmp); 393 PetscFree(outdat); 394 PetscFree(ctr); 395 PetscFree(pa); 396 PetscFree(rbuf[0]); 397 PetscFree(rbuf); 398 PetscFree(rbuf2[0]); 399 PetscFree(rbuf2); 400 PetscFree(send_waits); 401 PetscFree(recv_waits); 402 PetscFree(send_waits2); 403 PetscFree(recv_waits2); 404 PetscFree(table[0]); 405 PetscFree(table); 406 PetscFree(data); 407 PetscFree(send_status); 408 PetscFree(recv_status); 409 PetscFree(isz); 410 PetscFree(isz1); 411 PetscFree(xtable); 412 PetscFree(xdata); 413 414 /* Dont forget to ISRestoreIndices */ 415 return 0; 416 } 417