1 #ifndef lint 2 static char vcid[] = "$Id: mpiov.c,v 1.19 1996/02/10 01:37:08 balay Exp balay $"; 3 #endif 4 5 #include "mpiaij.h" 6 #include "inline/bitarray.h" 7 8 static int MatIncreaseOverlap_MPIAIJ_private(Mat, int, IS *); 9 static int FindOverlapLocal(Mat , int , char **,int*, int**); 10 static int FindOverlapRecievedMesg(Mat , int, int **, int**, int* ); 11 12 int MatIncreaseOverlap_MPIAIJ(Mat C, int imax, IS *is, int ov) 13 { 14 int i, ierr; 15 if (ov < 0){ SETERRQ(1," MatIncreaseOverlap_MPIAIJ: negative overlap specified\n");} 16 for (i =0; i<ov; ++i) { 17 ierr = MatIncreaseOverlap_MPIAIJ_private(C, imax, is); CHKERRQ(ierr); 18 } 19 return 0; 20 } 21 22 /* 23 Sample message format: 24 If a processor A wants processor B to process some elements corresponding 25 to index sets 1s[1], is[5] 26 mesg [0] = 2 ( no of index sets in the mesg) 27 ----------- 28 mesg [1] = 1 => is[1] 29 mesg [2] = sizeof(is[1]); 30 ----------- 31 mesg [5] = 5 => is[5] 32 mesg [6] = sizeof(is[5]); 33 ----------- 34 mesg [7] 35 mesg [n] datas[1] 36 ----------- 37 mesg[n+1] 38 mesg[m] data(is[5]) 39 ----------- 40 41 Notes: 42 nrqs - no of requests sent (or to be sent out) 43 nrqr - no of requests recieved (which have to be or which have been processed 44 */ 45 static int MatIncreaseOverlap_MPIAIJ_private(Mat C, int imax, IS *is) 46 { 47 Mat_MPIAIJ *c = (Mat_MPIAIJ *) C->data; 48 int **idx, *n, *w1, *w2, *w3, *w4, *rtable,**data; 49 int size, rank, m,i,j,k, ierr, **rbuf, row, proc, nrqs, msz, **outdat, **ptr; 50 int *ctr, *pa, tag, *tmp,bsz, nrqr , *isz, *isz1, **xdata; 51 int bsz1, **rbuf2; 52 char **table; 53 MPI_Comm comm; 54 MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2 ; 55 MPI_Status *send_status ,*recv_status; 56 double space, fr, maxs; 57 58 comm = C->comm; 59 tag = C->tag; 60 size = c->size; 61 rank = c->rank; 62 m = c->M; 63 64 65 TrSpace( &space, &fr, &maxs ); 66 /* MPIU_printf(MPI_COMM_SELF,"[%d] allocated space = %f fragments = %f max ever allocated = %f\n", rank, space, fr, maxs); */ 67 68 idx = (int **)PetscMalloc((imax+1)*sizeof(int *)); CHKPTRQ(idx); 69 n = (int *)PetscMalloc((imax+1)*sizeof(int )); CHKPTRQ(n); 70 rtable = (int *)PetscMalloc((m+1)*sizeof(int )); CHKPTRQ(rtable); 71 /* Hash table for maping row ->proc */ 72 73 for ( i=0 ; i<imax ; i++) { 74 ierr = ISGetIndices(is[i],&idx[i]); CHKERRQ(ierr); 75 ierr = ISGetLocalSize(is[i],&n[i]); CHKERRQ(ierr); 76 } 77 78 /* Create hash table for the mapping :row -> proc*/ 79 for( i=0, j=0; i< size; i++) { 80 for (; j <c->rowners[i+1]; j++) { 81 rtable[j] = i; 82 } 83 } 84 85 /* evaluate communication - mesg to who, length of mesg, and buffer space 86 required. Based on this, buffers are allocated, and data copied into them*/ 87 w1 = (int *)PetscMalloc((size)*4*sizeof(int )); CHKPTRQ(w1); /* mesg size */ 88 w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/ 89 w3 = w2 + size; /* no of IS that needs to be sent to proc i */ 90 w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */ 91 PetscMemzero(w1,(size)*3*sizeof(int)); /* initialise work vector*/ 92 for ( i=0; i<imax ; i++) { 93 PetscMemzero(w4,(size)*sizeof(int)); /* initialise work vector*/ 94 for ( j =0 ; j < n[i] ; j++) { 95 row = idx[i][j]; 96 proc = rtable[row]; 97 w4[proc]++; 98 } 99 for( j = 0; j < size; j++){ 100 if( w4[j] ) { w1[j] += w4[j]; w3[j] += 1;} 101 } 102 } 103 104 nrqs = 0; /* no of outgoing messages */ 105 msz = 0; /* total mesg length (for all proc */ 106 w1[rank] = 0; /* no mesg sent to intself */ 107 w3[rank] = 0; 108 for (i =0; i < size ; i++) { 109 if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 110 } 111 pa = (int *)PetscMalloc((nrqs +1)*sizeof(int)); CHKPTRQ(pa); /* (proc -array) */ 112 for (i =0, j=0; i < size ; i++) { 113 if (w1[i]) { pa[j] = i; j++; } 114 } 115 116 /* Each message would have a header = 1 + 2*(no of IS) + data */ 117 for (i = 0; i<nrqs ; i++) { 118 j = pa[i]; 119 w1[j] += w2[j] + 2* w3[j]; 120 msz += w1[j]; 121 } 122 123 124 /* Do a global reduction to determine how many messages to expect*/ 125 { 126 int *rw1, *rw2; 127 rw1 = (int *)PetscMalloc(2*size*sizeof(int)); CHKPTRQ(rw1); 128 rw2 = rw1+size; 129 MPI_Allreduce((void *)w1, rw1, size, MPI_INT, MPI_MAX, comm); 130 bsz = rw1[rank]; 131 MPI_Allreduce((void *)w2, rw2, size, MPI_INT, MPI_SUM, comm); 132 nrqr = rw2[rank]; 133 PetscFree(rw1); 134 } 135 136 /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */ 137 rbuf = (int**) PetscMalloc((nrqr+1) *sizeof(int*)); CHKPTRQ(rbuf); 138 rbuf[0] = (int *) PetscMalloc((nrqr *bsz+1) * sizeof(int)); CHKPTRQ(rbuf[0]); 139 for (i=1; i<nrqr ; ++i) rbuf[i] = rbuf[i-1] + bsz; 140 141 /* Now post the receives */ 142 recv_waits = (MPI_Request *) PetscMalloc((nrqr+1)*sizeof(MPI_Request)); 143 CHKPTRQ(recv_waits); 144 for ( i=0; i<nrqr; ++i){ 145 MPI_Irecv((void *)(rbuf[i]), bsz, MPI_INT, MPI_ANY_SOURCE, tag, comm, recv_waits+i); 146 } 147 148 /* Allocate Memory for outgoing messages */ 149 outdat = (int **)PetscMalloc( 2*size*sizeof(int*)); CHKPTRQ(outdat); 150 PetscMemzero(outdat, 2*size*sizeof(int*)); 151 tmp = (int *)PetscMalloc((msz+1) *sizeof (int)); CHKPTRQ(tmp); /*mrsg arr */ 152 ptr = outdat +size; /* Pointers to the data in outgoing buffers */ 153 ctr = (int *)PetscMalloc( size*sizeof(int)); CHKPTRQ(ctr); 154 155 { 156 int *iptr = tmp; 157 int ict = 0; 158 for (i = 0; i < nrqs ; i++) { 159 j = pa[i]; 160 iptr += ict; 161 outdat[j] = iptr; 162 ict = w1[j]; 163 } 164 } 165 166 /* Form the outgoing messages */ 167 /*plug in the headers*/ 168 for ( i=0 ; i<nrqs ; i++) { 169 j = pa[i]; 170 outdat[j][0] = 0; 171 PetscMemzero(outdat[j]+1, 2 * w3[j]*sizeof(int)); 172 ptr[j] = outdat[j] + 2*w3[j] +1; 173 } 174 175 /* Memory for doing local proc's work*/ 176 table = (char **)PetscMalloc((imax+1)*sizeof(int *)); CHKPTRQ(table); 177 data = (int **)PetscMalloc((imax+1)*sizeof(int *)); CHKPTRQ(data); 178 table[0] = (char *)PetscMalloc((m/BITSPERBYTE +1)*(imax)); CHKPTRQ(table[0]); 179 data [0] = (int *)PetscMalloc((m+1)*(imax)*sizeof(int)); CHKPTRQ(data[0]); 180 181 for(i = 1; i<imax ; i++) { 182 table[i] = table[0] + (m/BITSPERBYTE+1)*i; 183 data[i] = data[0] + (m+1)*i; 184 } 185 186 PetscMemzero((void*)*table,(m/BITSPERBYTE+1)*(imax)); 187 isz = (int *)PetscMalloc((imax+1) *sizeof(int)); CHKPTRQ(isz); 188 PetscMemzero((void *)isz,(imax+1) *sizeof(int)); 189 190 /* Parse the IS and update local tables and the outgoing buf with the data*/ 191 for ( i=0 ; i<imax ; i++) { 192 PetscMemzero(ctr,size*sizeof(int)); 193 for( j=0; j<n[i]; j++) { /* parse the indices of each IS */ 194 row = idx[i][j]; 195 proc = rtable[row]; 196 if (proc != rank) { /* copy to the outgoing buf*/ 197 ctr[proc]++; 198 *ptr[proc] = row; 199 ptr[proc]++; 200 } 201 else { /* Update the table */ 202 if ( !BT_LOOKUP(table[i],row)) { data[i][isz[i]++] = row;} 203 } 204 } 205 /* Update the headers for the current IS */ 206 for( j = 0; j<size; j++) { /* Can Optimise this loop too */ 207 if (ctr[j]) { 208 k= ++outdat[j][0]; 209 outdat[j][2*k] = ctr[j]; 210 outdat[j][2*k-1] = i; 211 } 212 } 213 } 214 215 216 217 /* Now post the sends */ 218 send_waits = (MPI_Request *) PetscMalloc((nrqs+1)*sizeof(MPI_Request)); 219 CHKPTRQ(send_waits); 220 for( i =0; i< nrqs; ++i){ 221 j = pa[i]; 222 MPI_Isend( (void *)(outdat[j]), w1[j], MPI_INT, j, tag, comm, send_waits+i); 223 } 224 225 /* I nolonger need the original indices*/ 226 for( i=0; i< imax; ++i) { 227 ierr = ISRestoreIndices(is[i], idx+i); CHKERRQ(ierr); 228 } 229 PetscFree(idx); 230 PetscFree(n); 231 PetscFree(rtable); 232 for( i=0; i< imax; ++i) { 233 ierr = ISDestroy(is[i]); CHKERRQ(ierr); 234 } 235 236 /* Do Local work*/ 237 ierr = FindOverlapLocal(C, imax, table,isz, data); CHKERRQ(ierr); 238 /* Extract the matrices */ 239 240 /* Receive messages*/ 241 { 242 int index; 243 244 recv_status = (MPI_Status *) PetscMalloc( (nrqr+1)*sizeof(MPI_Status) ); 245 CHKPTRQ(recv_status); 246 for ( i=0; i< nrqr; ++i) { 247 MPI_Waitany(nrqr, recv_waits, &index, recv_status+i); 248 } 249 250 send_status = (MPI_Status *) PetscMalloc( (nrqs+1)*sizeof(MPI_Status) ); 251 CHKPTRQ(send_status); 252 MPI_Waitall(nrqs,send_waits,send_status); 253 } 254 /* Pahse 1 sends are complete - deallocate buffers */ 255 PetscFree(outdat); 256 PetscFree(w1); 257 PetscFree(tmp); 258 259 /* int FindOverlapRecievedMesg(Mat C, int imax, int *isz, char **table, int **data)*/ 260 xdata = (int **)PetscMalloc((nrqr+1)*sizeof(int *)); CHKPTRQ(xdata); 261 isz1 = (int *)PetscMalloc((nrqr+1) *sizeof(int)); CHKPTRQ(isz1); 262 ierr = FindOverlapRecievedMesg(C, nrqr, rbuf,xdata,isz1); CHKERRQ(ierr); 263 264 /* Nolonger need rbuf. */ 265 PetscFree(rbuf[0]); 266 PetscFree(rbuf); 267 268 269 /* Send the data back*/ 270 /* Do a global reduction to know the buffer space req for incoming messages*/ 271 { 272 int *rw1, *rw2; 273 274 rw1 = (int *)PetscMalloc(2*size*sizeof(int)); CHKPTRQ(rw1); 275 PetscMemzero((void*)rw1,2*size*sizeof(int)); 276 rw2 = rw1+size; 277 for (i =0; i < nrqr ; ++i) { 278 proc = recv_status[i].MPI_SOURCE; 279 rw1[proc] = isz1[i]; 280 } 281 282 MPI_Allreduce((void *)rw1, (void *)rw2, size, MPI_INT, MPI_MAX, comm); 283 bsz1 = rw2[rank]; 284 PetscFree(rw1); 285 } 286 287 /* Allocate buffers*/ 288 289 /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */ 290 rbuf2 = (int**) PetscMalloc((nrqs+1) *sizeof(int*)); CHKPTRQ(rbuf2); 291 rbuf2[0] = (int *) PetscMalloc((nrqs*bsz1+1) * sizeof(int)); CHKPTRQ(rbuf2[0]); 292 for (i=1; i<nrqs ; ++i) rbuf2[i] = rbuf2[i-1] + bsz1; 293 294 /* Now post the receives */ 295 recv_waits2 = (MPI_Request *)PetscMalloc((nrqs+1)*sizeof(MPI_Request)); CHKPTRQ(recv_waits2) 296 CHKPTRQ(recv_waits2); 297 for ( i=0; i<nrqs; ++i){ 298 MPI_Irecv((void *)(rbuf2[i]), bsz1, MPI_INT, MPI_ANY_SOURCE, tag, comm, recv_waits2+i); 299 } 300 301 /* Now post the sends */ 302 send_waits2 = (MPI_Request *) PetscMalloc((nrqr+1)*sizeof(MPI_Request)); 303 CHKPTRQ(send_waits2); 304 for( i =0; i< nrqr; ++i){ 305 j = recv_status[i].MPI_SOURCE; 306 MPI_Isend( (void *)(xdata[i]), isz1[i], MPI_INT, j, tag, comm, send_waits2+i); 307 } 308 309 /* recieve work done on other processors*/ 310 { 311 int index, is_no, ct1, max; 312 MPI_Status *send_status2 ,*recv_status2; 313 314 recv_status2 = (MPI_Status *) PetscMalloc( (nrqs+1)*sizeof(MPI_Status) ); 315 CHKPTRQ(recv_status2); 316 317 318 for ( i=0; i< nrqs; ++i) { 319 MPI_Waitany(nrqs, recv_waits2, &index, recv_status2+i); 320 /* Process the message*/ 321 ct1 = 2*rbuf2[index][0]+1; 322 for (j=1; j<=rbuf2[index][0]; j++) { 323 max = rbuf2[index][2*j]; 324 is_no = rbuf2[index][2*j-1]; 325 for (k=0; k < max ; k++, ct1++) { 326 row = rbuf2[index][ct1]; 327 if(!BT_LOOKUP(table[is_no],row)) { data[is_no][isz[is_no]++] = row;} 328 } 329 } 330 } 331 332 333 send_status2 = (MPI_Status *) PetscMalloc( (nrqr+1)*sizeof(MPI_Status) ); 334 CHKPTRQ(send_status2); 335 MPI_Waitall(nrqr,send_waits2,send_status2); 336 337 PetscFree(send_status2); PetscFree(recv_status2); 338 } 339 340 TrSpace( &space, &fr, &maxs ); 341 /* MPIU_printf(MPI_COMM_SELF,"[%d] allocated space = %f fragments = %f max ever allocated = %f\n", rank, space, fr, maxs);*/ 342 343 PetscFree(ctr); 344 PetscFree(pa); 345 PetscFree(rbuf2[0]); 346 PetscFree(rbuf2); 347 PetscFree(send_waits); 348 PetscFree(recv_waits); 349 PetscFree(send_waits2); 350 PetscFree(recv_waits2); 351 PetscFree(table[0]); 352 PetscFree(table); 353 PetscFree(send_status); 354 PetscFree(recv_status); 355 PetscFree(isz1); 356 PetscFree(xdata[0]); 357 PetscFree(xdata); 358 359 for ( i=0; i<imax; ++i) { 360 ierr = ISCreateSeq(MPI_COMM_SELF, isz[i], data[i], is+i); CHKERRQ(ierr); 361 } 362 PetscFree(isz); 363 PetscFree(data[0]); 364 PetscFree(data); 365 366 return 0; 367 } 368 369 /* FindOverlapLocal() - Called by MatincreaseOverlap, to do the work on 370 the local processor. 371 372 Inputs: 373 C - MAT_MPIAIJ; 374 imax - total no of index sets processed at a time; 375 table - an array of char - size = m bits. 376 377 Output: 378 isz - array containing the count of the solution elements correspondign 379 to each index set; 380 data - pointer to the solutions 381 */ 382 static int FindOverlapLocal(Mat C, int imax, char **table, int *isz,int **data) 383 { 384 Mat_MPIAIJ *c = (Mat_MPIAIJ *) C->data; 385 Mat A = c->A, B = c->B; 386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; 387 int start, end, val, max, rstart,cstart, ashift, bshift,*ai, *aj; 388 int *bi, *bj, *garray, i, j, k, row; 389 390 rstart = c->rstart; 391 cstart = c->cstart; 392 ashift = a->indexshift; 393 ai = a->i; 394 aj = a->j +ashift; 395 bshift = b->indexshift; 396 bi = b->i; 397 bj = b->j +bshift; 398 garray = c->garray; 399 400 401 for( i=0; i<imax; i++) { 402 for ( j=0, max =isz[i] ; j< max; j++) { 403 row = data[i][j] - rstart; 404 start = ai[row]; 405 end = ai[row+1]; 406 for ( k=start; k < end; k++) { /* Amat */ 407 val = aj[k] + ashift + cstart; 408 if(!BT_LOOKUP(table[i],val)) { data[i][isz[i]++] = val;} 409 } 410 start = bi[row]; 411 end = bi[row+1]; 412 for ( k=start; k < end; k++) { /* Bmat */ 413 val = garray[bj[k]+bshift] ; 414 if(! BT_LOOKUP(table[i],val)) { data[i][isz[i]++] = val;} 415 } 416 } 417 } 418 419 return 0; 420 } 421 /* FindOverlapRecievedMesg - Process the recieved messages, 422 and return the output 423 424 Input: 425 C - the matrix 426 nrqr - no of messages being processed. 427 rbuf - an array of pointers to the recieved requests 428 429 Output: 430 xdata - array of messages to be sent back 431 isz1 - size of each message 432 */ 433 static int FindOverlapRecievedMesg(Mat C, int nrqr, int ** rbuf, int ** xdata, int * isz1 ) 434 { 435 Mat_MPIAIJ *c = (Mat_MPIAIJ *) C->data; 436 Mat A = c->A, B = c->B; 437 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; 438 int rstart,cstart, ashift, bshift,*ai, *aj, *bi, *bj, *garray, i, j, k; 439 int row,total_sz,ct, ct1, ct2, ct3,mem_estimate, oct2, l, start, end; 440 int val, max1, max2, rank, m, no_malloc =0, *tmp, new_estimate, ctr; 441 char *xtable; 442 443 rank = c->rank; 444 m = c->M; 445 rstart = c->rstart; 446 cstart = c->cstart; 447 ashift = a->indexshift; 448 ai = a->i; 449 aj = a->j +ashift; 450 bshift = b->indexshift; 451 bi = b->i; 452 bj = b->j +bshift; 453 garray = c->garray; 454 455 456 for (i =0, ct =0, total_sz =0; i< nrqr; ++i){ 457 ct+= rbuf[i][0]; 458 for ( j = 1; j <= rbuf[i][0] ; j++ ) { total_sz += rbuf[i][2*j]; } 459 } 460 461 max1 = ct*(a->nz +b->nz)/c->m; 462 mem_estimate = 3*((total_sz > max1?total_sz:max1)+1); 463 xdata[0] = (int *)PetscMalloc(mem_estimate *sizeof(int)); CHKPTRQ(xdata[0]); 464 ++no_malloc; 465 xtable = (char *)PetscMalloc((m/BITSPERBYTE+1)); CHKPTRQ(xtable); 466 PetscMemzero((void *)isz1,(nrqr+1) *sizeof(int)); 467 468 ct3 = 0; 469 for (i =0; i< nrqr; i++) { /* for easch mesg from proc i */ 470 ct1 = 2*rbuf[i][0]+1; 471 ct2 = ct1; 472 ct3+= ct1; 473 for (j = 1, max1= rbuf[i][0]; j<=max1; j++) { /* for each IS from proc i*/ 474 PetscMemzero((void *)xtable,(m/BITSPERBYTE+1)); 475 oct2 = ct2; 476 for (k =0; k < rbuf[i][2*j]; k++, ct1++) { 477 row = rbuf[i][ct1]; 478 if(!BT_LOOKUP(xtable,row)) { 479 if (!(ct3 < mem_estimate)) { 480 new_estimate = (int)1.5*mem_estimate+1; 481 tmp = (int*) PetscMalloc(new_estimate * sizeof(int)); CHKPTRQ(tmp); 482 PetscMemcpy((char *)tmp,(char *)xdata[0],mem_estimate*sizeof(int)); 483 PetscFree(xdata[0]); 484 xdata[0] = tmp; 485 mem_estimate = new_estimate; ++no_malloc; 486 for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 487 } 488 xdata[i][ct2++] = row;ct3++; 489 } 490 } 491 for ( k=oct2, max2 =ct2 ; k< max2; k++) { 492 row = xdata[i][k] - rstart; 493 start = ai[row]; 494 end = ai[row+1]; 495 for ( l=start; l < end; l++) { 496 val = aj[l] +ashift + cstart; 497 if(!BT_LOOKUP(xtable,val)) { 498 if (!(ct3 < mem_estimate)) { 499 new_estimate = (int)1.5*mem_estimate+1; 500 tmp = (int*) PetscMalloc(new_estimate * sizeof(int)); CHKPTRQ(tmp); 501 PetscMemcpy((char *)tmp,(char *)xdata[0],mem_estimate*sizeof(int)); 502 PetscFree(xdata[0]); 503 xdata[0] = tmp; 504 mem_estimate = new_estimate; ++no_malloc; 505 for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 506 } 507 xdata[i][ct2++] = val;ct3++; 508 } 509 } 510 start = bi[row]; 511 end = bi[row+1]; 512 for ( l=start; l < end; l++) { 513 val = garray[bj[l]+bshift] ; 514 if(!BT_LOOKUP(xtable,val)) { 515 if (!(ct3 < mem_estimate)) { 516 new_estimate = (int)1.5*mem_estimate+1; 517 tmp = (int*) PetscMalloc(new_estimate * sizeof(int)); CHKPTRQ(tmp); 518 PetscMemcpy((char *)tmp,(char *)xdata[0],mem_estimate*sizeof(int)); 519 PetscFree(xdata[0]); 520 xdata[0] = tmp; 521 mem_estimate = new_estimate; ++no_malloc; 522 for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 523 } 524 xdata[i][ct2++] = val;ct3++; 525 } 526 } 527 } 528 /* Update the header*/ 529 xdata[i][2*j] = ct2-oct2; /* Undo the vector isz1 and use only a var*/ 530 xdata[i][2*j-1] = rbuf[i][2*j-1]; 531 } 532 xdata[i][0] = rbuf[i][0]; 533 xdata[i+1] = xdata[i] +ct2; 534 isz1[i] = ct2; /* size of each message */ 535 } 536 PetscFree(xtable); 537 PLogInfo(0,"MatIncreaseOverlap_MPIAIJ:[%d] Allocated %d bytes, required %d bytes, no of mallocs = %d\n",rank,mem_estimate, ct3,no_malloc); 538 return 0; 539 } 540 541 542 int MatGetSubMatrices_MPIAIJ (Mat C,int ismax, IS *isrow,IS *iscol,MatGetSubMatrixCall 543 scall, Mat **submat) 544 { 545 Mat_MPIAIJ *c = (Mat_MPIAIJ *) C->data; 546 Mat A = c->A, B = c->B; 547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; 548 int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable, start, end, size; 549 int **sbuf1,**sbuf2, rank, m,i,j,k,l,ct1,ct2, ierr, **rbuf1, row, proc; 550 int nrqs, msz, **ptr, index, *req_size, *ctr, *pa, tag, *tmp,bsz, nrqr; 551 int **rbuf3,*req_source,**sbuf_aj, rstart,cstart, ashift, bshift,*ai, *aj; 552 int *bi, *bj,*garray,**rbuf2, max1,max2, **rmap, **cmap, **srmap, **scmap; 553 int **lens, is_no; 554 MPI_Request *send_waits, *recv_waits, *send_waits2, *recv_waits2, *recv_waits3 ; 555 MPI_Request *recv_waits4,*send_waits3,*send_waits4; 556 MPI_Status *recv_status ,*recv_status2,*send_status,*send_status3 ,*send_status2; 557 MPI_Status *recv_status3,*recv_status4,*send_status4; 558 MPI_Comm comm; 559 Scalar **rbuf4, *aa, *ba, **sbuf_aa; 560 double space, fr, maxs; 561 562 563 comm = C->comm; 564 tag = C->tag; 565 size = c->size; 566 rank = c->rank; 567 m = c->M; 568 rstart = c->rstart; 569 cstart = c->cstart; 570 ashift = a->indexshift; 571 ai = a->i; 572 aj = a->j; 573 aa = a->a; 574 bshift = b->indexshift; 575 bi = b->i; 576 bj = b->j; 577 ba = b->a; 578 garray = c->garray; 579 580 TrSpace( &space, &fr, &maxs ); 581 /* MPIU_printf(MPI_COMM_SELF,"[%d] allocated space = %f fragments = %f max ever allocated = %f\n", rank, space, fr, maxs); */ 582 583 irow = (int **)PetscMalloc((ismax+1)*sizeof(int *)); CHKPTRQ(irow); 584 icol = (int **)PetscMalloc((ismax+1)*sizeof(int *)); CHKPTRQ(icol); 585 nrow = (int *) PetscMalloc((ismax+1)*sizeof(int )); CHKPTRQ(nrow); 586 ncol = (int *) PetscMalloc((ismax+1)*sizeof(int )); CHKPTRQ(ncol); 587 rtable = (int *) PetscMalloc((m+1)*sizeof(int )); CHKPTRQ(rtable); 588 /* Hash table for maping row ->proc */ 589 590 for ( i=0 ; i<ismax ; i++) { /* Extract the indicies and sort them */ 591 ierr = ISGetIndices(isrow[i],&irow[i]); CHKERRQ(ierr); 592 ierr = ISGetIndices(iscol[i],&icol[i]); CHKERRQ(ierr); 593 ierr = ISGetLocalSize(isrow[i],&nrow[i]); CHKERRQ(ierr); 594 ierr = ISGetLocalSize(iscol[i],&ncol[i]); CHKERRQ(ierr); 595 ierr = SYIsort(nrow[i], irow[i]); CHKERRQ(ierr); 596 ierr = SYIsort(ncol[i], icol[i]); CHKERRQ(ierr); 597 } 598 599 /* Create hash table for the mapping :row -> proc*/ 600 for( i=0, j=0; i< size; i++) { 601 for (; j <c->rowners[i+1]; j++) { 602 rtable[j] = i; 603 } 604 } 605 606 /* evaluate communication - mesg to who, length of mesg, and buffer space 607 required. Based on this, buffers are allocated, and data copied into them*/ 608 w1 = (int *)PetscMalloc((size)*4*sizeof(int )); CHKPTRQ(w1); /* mesg size */ 609 w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/ 610 w3 = w2 + size; /* no of IS that needs to be sent to proc i */ 611 w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */ 612 PetscMemzero(w1,(size)*3*sizeof(int)); /* initialise work vector*/ 613 for ( i=0; i<ismax ; i++) { 614 PetscMemzero(w4,(size)*sizeof(int)); /* initialise work vector*/ 615 for ( j =0 ; j < nrow[i] ; j++) { 616 row = irow[i][j]; 617 proc = rtable[row]; 618 w4[proc]++; 619 } 620 for( j = 0; j < size; j++){ 621 if( w4[j] ) { w1[j] += w4[j]; w3[j] += 1;} 622 } 623 } 624 625 nrqs = 0; /* no of outgoing messages */ 626 msz = 0; /* total mesg length (for all proc */ 627 w1[rank] = 0; /* no mesg sent to intself */ 628 w3[rank] = 0; 629 for (i =0; i < size ; i++) { 630 if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 631 } 632 pa = (int *)PetscMalloc((nrqs +1)*sizeof(int)); CHKPTRQ(pa); /* (proc -array) */ 633 for (i =0, j=0; i < size ; i++) { 634 if (w1[i]) { pa[j] = i; j++; } 635 } 636 637 /* Each message would have a header = 1 + 2*(no of IS) + data */ 638 for (i = 0; i<nrqs ; i++) { 639 j = pa[i]; 640 w1[j] += w2[j] + 2* w3[j]; 641 msz += w1[j]; 642 } 643 644 645 /* Do a global reduction to determine how many messages to expect*/ 646 { 647 int *rw1, *rw2; 648 rw1 = (int *)PetscMalloc(2*size*sizeof(int)); CHKPTRQ(rw1); 649 rw2 = rw1+size; 650 MPI_Allreduce((void *)w1, rw1, size, MPI_INT, MPI_MAX, comm); 651 bsz = rw1[rank]; 652 MPI_Allreduce((void *)w2, rw2, size, MPI_INT, MPI_SUM, comm); 653 nrqr = rw2[rank]; 654 PetscFree(rw1); 655 } 656 657 /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */ 658 rbuf1 = (int**) PetscMalloc((nrqr+1) *sizeof(int*)); CHKPTRQ(rbuf1); 659 rbuf1[0] = (int *) PetscMalloc((nrqr *bsz+1) * sizeof(int)); CHKPTRQ(rbuf1[0]); 660 for (i=1; i<nrqr ; ++i) rbuf1[i] = rbuf1[i-1] + bsz; 661 662 /* Now post the receives */ 663 recv_waits = (MPI_Request *) PetscMalloc((nrqr+1)*sizeof(MPI_Request)); 664 CHKPTRQ(recv_waits); 665 for ( i=0; i<nrqr; ++i){ 666 MPI_Irecv((void *)(rbuf1[i]), bsz, MPI_INT, MPI_ANY_SOURCE, tag, comm, recv_waits+i); 667 } 668 669 /* Allocate Memory for outgoing messages */ 670 sbuf1 = (int **)PetscMalloc( 2*size*sizeof(int*)); CHKPTRQ(sbuf1); 671 PetscMemzero(sbuf1, 2*size*sizeof(int*)); 672 /* allocate memory for outgoing data + buf to recive the first reply */ 673 tmp = (int *)PetscMalloc((2*msz+1) *sizeof (int)); CHKPTRQ(tmp); /*mrsg arr */ 674 ptr = sbuf1 +size; /* Pointers to the data in outgoing buffers */ 675 ctr = (int *)PetscMalloc( size*sizeof(int)); CHKPTRQ(ctr); 676 677 { 678 int *iptr = tmp; 679 int ict = 0; 680 for (i = 0; i < nrqs ; i++) { 681 j = pa[i]; 682 iptr += ict; 683 sbuf1[j] = iptr; 684 ict = w1[j]; 685 } 686 } 687 688 /* Form the outgoing messages */ 689 /* Initialise the header space */ 690 for ( i=0 ; i<nrqs ; i++) { 691 j = pa[i]; 692 sbuf1[j][0] = 0; 693 PetscMemzero(sbuf1[j]+1, 2 * w3[j]*sizeof(int)); 694 ptr[j] = sbuf1[j] + 2*w3[j] +1; 695 } 696 697 698 /* Parse the isrow and copy data into outbuf */ 699 for ( i=0 ; i<ismax ; i++) { 700 PetscMemzero(ctr,size*sizeof(int)); 701 for( j=0; j<nrow[i]; j++) { /* parse the indices of each IS */ 702 row = irow[i][j]; 703 proc = rtable[row]; 704 if (proc != rank) { /* copy to the outgoing buf*/ 705 ctr[proc]++; 706 *ptr[proc] = row; 707 ptr[proc]++; 708 } 709 } 710 /* Update the headers for the current IS */ 711 for( j = 0; j<size; j++) { /* Can Optimise this loop too */ 712 if (ctr[j]) { 713 k= ++sbuf1[j][0]; 714 sbuf1[j][2*k] = ctr[j]; 715 sbuf1[j][2*k-1] = i; 716 } 717 } 718 } 719 720 /* Now post the sends */ 721 send_waits = (MPI_Request *) PetscMalloc((nrqs+1)*sizeof(MPI_Request)); 722 CHKPTRQ(send_waits); 723 for( i =0; i< nrqs; ++i){ 724 j = pa[i]; 725 printf("[%d] Send Req to %d: size %d \n", rank,j, w1[j]); 726 MPI_Isend( (void *)(sbuf1[j]), w1[j], MPI_INT, j, tag, comm, send_waits+i); 727 } 728 729 /* Post Recieves to capture the buffer size */ 730 recv_waits2 = (MPI_Request *) PetscMalloc((nrqs+1)*sizeof(MPI_Request)); 731 CHKPTRQ(recv_waits2); 732 rbuf2 = (int**)PetscMalloc((nrqs+1) *sizeof(int *)); CHKPTRQ(rbuf2); 733 rbuf2[0] = tmp + msz; 734 for( i =1; i< nrqs; ++i){ 735 j = pa[i]; 736 rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]]; 737 } 738 for( i =0; i< nrqs; ++i){ 739 j = pa[i]; 740 MPI_Irecv( (void *)(rbuf2[i]), w1[j], MPI_INT, j, tag+1, comm, recv_waits2+i); 741 } 742 743 /* Send to other procs the buf size they should allocate */ 744 745 746 /* Receive messages*/ 747 send_waits2 = (MPI_Request *) PetscMalloc((nrqr+1)*sizeof(MPI_Request)); 748 CHKPTRQ(send_waits2); 749 recv_status = (MPI_Status *) PetscMalloc( (nrqr+1)*sizeof(MPI_Status) ); 750 CHKPTRQ(recv_status); 751 req_size = (int *) PetscMalloc( (nrqr +1) * sizeof(int)) ; CHKPTRQ(req_size); 752 req_source = (int *) PetscMalloc( (nrqr +1) * sizeof(int)) ; CHKPTRQ(req_source); 753 sbuf2 = (int**) PetscMalloc( (nrqr +1) * sizeof(int*)) ; CHKPTRQ(sbuf2); 754 755 for ( i=0; i< nrqr; ++i) { 756 MPI_Waitany(nrqr, recv_waits, &index, recv_status+i); 757 /* req_size[index] = 2*rbuf1[index][0];*/ 758 req_size[index] = 0; 759 start = 2*rbuf1[index][0] + 1 ; 760 MPI_Get_count(recv_status+i,MPI_INT, &end); 761 sbuf2 [index] = (int *)PetscMalloc(end*sizeof(int)); CHKPTRQ(sbuf2[index]); 762 763 for (j=start; j< end; j++) { 764 row = rbuf1[index][j] - rstart; 765 sbuf2[index][j] = (ai[row+1] - ai[row]); /*overite it with nz count of that row */ 766 sbuf2[index][j] += (bi[row+1] - bi[row]); 767 req_size[index] += sbuf2[index][j]; 768 } 769 req_source[index] = recv_status[i].MPI_SOURCE; 770 /* form the header */ 771 sbuf2[index][0] = req_size[index]; 772 for (j=1; j<start; j++){ sbuf2[index][j] = rbuf1[index][j]; } 773 printf("[%d] Send to %d: size %d, index = %d start = %d end = %d \n", rank, 774 req_source[index] , *(req_size+index), index, start, end); 775 MPI_Isend((void *)(sbuf2[index]),end,MPI_INT,req_source[index],tag+1, comm, send_waits2+i); 776 777 } 778 779 780 /* recv buffer sizes */ 781 /* Receive messages*/ 782 783 rbuf3 = (int**)PetscMalloc((nrqs+1) *sizeof(int*)); CHKPTRQ(rbuf3); 784 rbuf4 = (Scalar**)PetscMalloc((nrqs+1) *sizeof(Scalar*)); CHKPTRQ(rbuf4); 785 recv_waits3 = (MPI_Request *) PetscMalloc((nrqs+1)*sizeof(MPI_Request)); 786 CHKPTRQ(recv_waits3); 787 recv_waits4 = (MPI_Request *) PetscMalloc((nrqs+1)*sizeof(MPI_Request)); 788 CHKPTRQ(recv_waits4); 789 recv_status2 = (MPI_Status *) PetscMalloc( (nrqs+1)*sizeof(MPI_Status) ); 790 CHKPTRQ(recv_status2); 791 for ( i=0; i< nrqs; ++i) { 792 MPI_Waitany(nrqs, recv_waits2, &index, recv_status2+i); 793 794 rbuf3[index] = (int *)PetscMalloc(rbuf2[index][0]*sizeof(int)); 795 CHKPTRQ(rbuf3[index]); 796 rbuf4[index] = (Scalar *)PetscMalloc(rbuf2[index][0]*sizeof(Scalar)); 797 CHKPTRQ(rbuf4[index]); 798 printf("[%d] Posting Irecv for aj, aa from %d, size of buf = %d \n",rank, 799 recv_status2[i].MPI_SOURCE,rbuf2[index][0]); 800 MPI_Irecv((void *)(rbuf3[index]),rbuf2[index][0], MPI_INT, 801 recv_status2[i].MPI_SOURCE, tag+2, comm, recv_waits3+index); 802 MPI_Irecv((void *)(rbuf4[index]),rbuf2[index][0], MPIU_SCALAR, 803 recv_status2[i].MPI_SOURCE, tag+3, comm, recv_waits4+index); 804 805 } 806 807 /* Wait on sends1 and sends2 */ 808 send_status = (MPI_Status *) PetscMalloc( (nrqs+1)*sizeof(MPI_Status) ); 809 CHKPTRQ(send_status); 810 send_status2 = (MPI_Status *) PetscMalloc( (nrqr+1)*sizeof(MPI_Status) ); 811 CHKPTRQ(send_status2); 812 813 MPI_Waitall(nrqs,send_waits,send_status); 814 MPI_Waitall(nrqr,send_waits2,send_status2); 815 816 817 /* Now allocate buffers for a->j, and send them off */ 818 sbuf_aj = (int **)PetscMalloc((nrqr+1)*sizeof(int *)); CHKPTRQ(sbuf_aj); 819 for ( i=0, j =0; i< nrqr; i++) j += req_size[i]; 820 sbuf_aj[0] = (int*) PetscMalloc((j+1)*sizeof(int)); CHKPTRQ(sbuf_aj[0]); 821 for (i =1; i< nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1]; 822 823 send_waits3 = (MPI_Request *) PetscMalloc((nrqr+1)*sizeof(MPI_Request)); 824 CHKPTRQ(send_waits3); 825 for (i=0; i<nrqr; i++) { 826 ct1 = 2*rbuf1[i][0]+1; 827 ct2 = 0; 828 for (j=1, max1 = rbuf1[i][0]; j<= max1; j++){ 829 for( k=0; k< rbuf1[i][2*j]; k++, ct1++) { 830 row = rbuf1[i][ct1] - rstart; 831 start = ai[row]; 832 end = ai[row+1]; 833 for (l = start; l< end; l++) { 834 sbuf_aj[i][ct2++] = aj[l] + cstart; 835 } 836 start = bi[row]; 837 end = bi[row+1]; 838 for (l = start; l< end; l++) { 839 sbuf_aj[i][ct2++] = garray[bj[l]]; 840 } 841 } 842 } 843 /* no header for this message */ 844 printf("[%d] Send AJ to %d: size %d, ct2 = %d \n", rank, req_source[i] , req_size[i], ct2); 845 MPI_Isend((void *)(sbuf_aj[i]),req_size[i],MPI_INT,req_source[i],tag+2, comm, send_waits3+i); 846 } 847 recv_status3 = (MPI_Status *) PetscMalloc( (nrqs+1)*sizeof(MPI_Status) ); 848 CHKPTRQ(recv_status3); 849 send_status3 = (MPI_Status *) PetscMalloc( (nrqr+1)*sizeof(MPI_Status) ); 850 CHKPTRQ(send_status3); 851 /* 852 for (i=0; i< nrqs; ++i){ 853 MPI_Waitany(nrqs, recv_waits3, &index, recv_status3+i); 854 } 855 856 for (i=0; i< nrqr; ++i){ 857 MPI_Waitany(nrqr, send_waits3, &index, send_status3+i); 858 } 859 */ 860 MPI_Waitall(nrqs,recv_waits3,recv_status3); 861 MPI_Waitall(nrqr,send_waits3,send_status3); 862 863 864 /* Now allocate buffers for a->a, and send them off */ 865 sbuf_aa = (Scalar **)PetscMalloc((nrqr+1)*sizeof(Scalar *)); CHKPTRQ(sbuf_aa); 866 for ( i=0, j =0; i< nrqr; i++) j += req_size[i]; 867 sbuf_aa[0] = (Scalar*) PetscMalloc((j+1)*sizeof(Scalar)); CHKPTRQ(sbuf_aa[0]); 868 for (i =1; i< nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1]; 869 870 send_waits4 = (MPI_Request *) PetscMalloc((nrqr+1)*sizeof(MPI_Request)); 871 CHKPTRQ(send_waits4); 872 for (i=0; i<nrqr; i++) { 873 ct1 = 2*rbuf1[i][0]+1; 874 ct2 = 0; 875 for (j=1, max1 = rbuf1[i][0]; j<= max1; j++){ 876 for( k=0; k< rbuf1[i][2*j]; k++, ct1++) { 877 row = rbuf1[i][ct1] - rstart; 878 start = ai[row]; 879 end = ai[row+1]; 880 for (l = start; l< end; l++) { 881 sbuf_aa[i][ct2++] = aa[l]; 882 } 883 start = bi[row]; 884 end = bi[row+1]; 885 for (l = start; l< end; l++) { 886 sbuf_aa[i][ct2++] = ba[l]; 887 } 888 } 889 } 890 /* no header for this message */ 891 printf("[%d] Send AA to %d: size %d, ct2 = %d \n", rank, req_source[i] , req_size[i], ct2); 892 MPI_Isend((void *)(sbuf_aa[i]),req_size[i],MPIU_SCALAR,req_source[i],tag+3, comm, send_waits4+i); 893 } 894 recv_status4 = (MPI_Status *) PetscMalloc( (nrqs+1)*sizeof(MPI_Status) ); 895 CHKPTRQ(recv_status4); 896 send_status4 = (MPI_Status *) PetscMalloc( (nrqr+1)*sizeof(MPI_Status) ); 897 CHKPTRQ(send_status4); 898 899 /* for (i=0; i< nrqs; ++i){ 900 MPI_Waitany(nrqs, recv_waits4, &index, recv_status4+i); 901 } 902 903 for (i=0; i< nrqr; ++i){ 904 MPI_Waitany(nrqr, send_waits4, &index, send_status4+i); 905 }*/ 906 907 MPI_Waitall(nrqs,recv_waits4,recv_status4); 908 MPI_Waitall(nrqr,send_waits4,send_status4); 909 910 911 /* Now u have all the data required, so form the matrix */ 912 /* rbuf3->aj, rbuf4 -> aa */ 913 914 /* create col map */ 915 cmap = (int **) PetscMalloc((1+ ismax)*sizeof(int *)); CHKPTRQ(cmap); 916 scmap = (int **)PetscMalloc((1+ ismax)*sizeof(int *)); CHKPTRQ(scmap); 917 cmap[0] = (int *)PetscMalloc((1+ ismax*c->N)*sizeof(int)); CHKPTRQ(cmap[0]); 918 PetscMemzero((char *)cmap[0],(1+ ismax*c->N)*sizeof(int)); 919 for (i =1; i<ismax; i++) { cmap[i] = cmap[i-1] + c->n; } 920 for (i =0; i<ismax; i++) { scmap[i] = cmap[i] +ashift;} 921 for (i=0; i< ismax; i++) { 922 for ( j=0; j< ncol[i]; j++) { 923 scmap[i][icol[i][j]] = j+1; 924 } 925 } 926 927 /* Create lens which is required for MatCreate... */ 928 lens = (int **)PetscMalloc((1+ ismax)*sizeof(int *)); CHKPTRQ(lens); 929 for (i =0, j=0; i<ismax; i++) { j +=nrow[i]; } 930 lens[0] = (int *)PetscMalloc((1+ j)*sizeof(int)); CHKPTRQ(lens[0]); 931 PetscMemzero((char *)lens[0], (1+ j)*sizeof(int)); 932 for (i =1; i<ismax; i++) { lens[i] = lens[i-1] +nrow[i-1]; } 933 934 /* Update lens from local data */ 935 for (i=0; i< ismax; i++) { 936 for (j =0; j< nrow[i]; j++) { 937 row = irow[i][j] ; 938 proc = rtable[row]; 939 if (proc == rank) { 940 row -= rstart; 941 start = ai[row]; 942 end = ai[row +1]; 943 for (k =start; k < end; k++) { 944 if ( scmap[aj[k]+cstart]) { lens[i][j]++ ;} 945 } 946 start = bi[row]; 947 end = bi[row +1]; 948 for (k =start; k < end; k++) { 949 if ( scmap[garray[bj[k]]]) { lens[i][j]++ ;} 950 } 951 } 952 } 953 } 954 955 /* Create row map*/ 956 rmap = (int **)PetscMalloc((1+ ismax)*sizeof(int *)); CHKPTRQ(rmap); 957 srmap = (int **)PetscMalloc((1+ ismax)*sizeof(int *)); CHKPTRQ(srmap); 958 rmap[0] = (int *)PetscMalloc((1+ ismax*c->M)*sizeof(int)); CHKPTRQ(rmap[0]); 959 PetscMemzero((char *)rmap[0],(1+ ismax*c->M)*sizeof(int)); 960 for (i =1; i<ismax; i++) { rmap[i] = rmap[i-1] + c->m ;} 961 for (i =0; i<ismax; i++) { srmap[i] = rmap[i] +ashift;} 962 for (i=0; i< ismax; i++) { 963 for ( j=0; j< nrow[i]; j++) { 964 srmap[i][irow[i][j]] = j; 965 } 966 } 967 968 /* Update lens from offproc data */ 969 for ( i =0; i<nrqs; i++) { 970 index = pa[i]; 971 ct1 = 2*sbuf1[index][0]+1; /* sbuf1, rbuf2*/ 972 ct2 = 0; /* rbuf3, rbuf4 */ 973 for (j =1; j<= sbuf1[pa[i]][0]; j++) { 974 is_no = sbuf1[index][2*j-1]; 975 max1 = sbuf1[index][2*j]; 976 for (k =0; k< max1; k++, ct1++) { 977 row = sbuf1[index][ct1]; 978 row =rmap[is_no][row]; /* the val in the new matrix to be */ 979 max2 = rbuf2[i][ct1]; 980 for (l=0; l<max2; l++, ct2++) { 981 if (scmap[rbuf3[i][ct2]]) { 982 lens[is_no][row]++; 983 } 984 } 985 } 986 } 987 } 988 989 /* Create the submatrices */ 990 if( scall == MAT_REUSE_MATRIX) { 991 int n_cols, n_rows; 992 for (i=0; i<ismax; i++){ 993 ierr = MatGetSize((*submat)[i],&n_rows, &n_cols); CHKERRQ(ierr); 994 if ((n_rows !=nrow[i]) || (n_cols !=ncol[i])) { 995 SETERRQ(1,"MatGetSubmatrices_MPIAIJ:"); 996 } 997 } 998 } 999 else { 1000 *submat = (Mat *)PetscMalloc(ismax*sizeof(Mat)); CHKPTRQ(*submat); 1001 for ( i=0; i<ismax; i++) { 1002 ierr = MatCreateSeqAIJ(comm, nrow[i],ncol[i],0,lens[i],(*submat)+i); CHKERRQ(ierr); 1003 } 1004 } 1005 1006 /* Assemble the matrices */ 1007 1008 1009 1010 /* Restore the indices */ 1011 for (i=0; i<ismax; i++) { 1012 ierr = ISRestoreIndices(isrow[i], irow+i); CHKERRQ(ierr); 1013 ierr = ISRestoreIndices(iscol[i], icol+i); CHKERRQ(ierr); 1014 } 1015 /* Destroy allocated memory */ 1016 PetscFree(nrow); 1017 PetscFree(ncol); 1018 PetscFree(rtable); 1019 PetscFree(w1); 1020 PetscFree(pa); 1021 PetscFree(rbuf1[0]); 1022 PetscFree(rbuf1); 1023 PetscFree(recv_waits ); 1024 PetscFree(sbuf1 ); 1025 PetscFree(tmp); 1026 PetscFree(ctr); 1027 PetscFree(send_waits); 1028 PetscFree(recv_waits2); 1029 PetscFree(rbuf2); 1030 PetscFree(send_waits2); 1031 PetscFree(recv_status); 1032 PetscFree(req_size); 1033 PetscFree(req_source); 1034 for ( i=0; i< nrqr; ++i) { 1035 PetscFree(sbuf2[i]); 1036 } 1037 for ( i=0; i< nrqs; ++i) { 1038 PetscFree(rbuf3[i]); 1039 PetscFree(rbuf4[i]); 1040 } 1041 1042 PetscFree( sbuf2 ); 1043 PetscFree(rbuf3); 1044 PetscFree(rbuf4 ); 1045 PetscFree(recv_waits3); 1046 PetscFree(recv_waits4); 1047 PetscFree(recv_status2); 1048 PetscFree( send_status); 1049 PetscFree(send_status2); 1050 PetscFree(sbuf_aj[0]); 1051 PetscFree(sbuf_aj); 1052 PetscFree(send_waits3); 1053 PetscFree(recv_status3); 1054 PetscFree(send_status3); 1055 PetscFree(sbuf_aa[0]); 1056 PetscFree(sbuf_aa); 1057 PetscFree(send_waits4); 1058 PetscFree(recv_status4); 1059 1060 return 0; 1061 } 1062 1063 1064 1065 1066