1 #define PETSCMAT_DLL 2 3 /* 4 Routines to compute overlapping regions of a parallel MPI matrix 5 and to find submatrices that were shared across processors. 6 */ 7 #include "../src/mat/impls/baij/mpi/mpibaij.h" 8 #include "petscbt.h" 9 10 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Once(Mat,PetscInt,IS *); 11 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat,PetscInt,char **,PetscInt*,PetscInt**); 12 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat,PetscInt,PetscInt **,PetscInt**,PetscInt*); 13 EXTERN PetscErrorCode MatGetRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 14 EXTERN PetscErrorCode MatRestoreRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ" 18 PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov) 19 { 20 PetscErrorCode ierr; 21 PetscInt i,N=C->cmap->N, bs=C->rmap->bs; 22 IS *is_new; 23 24 PetscFunctionBegin; 25 ierr = PetscMalloc(imax*sizeof(IS),&is_new);CHKERRQ(ierr); 26 /* Convert the indices into block format */ 27 ierr = ISCompressIndicesGeneral(N,bs,imax,is,is_new);CHKERRQ(ierr); 28 if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");} 29 for (i=0; i<ov; ++i) { 30 ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,imax,is_new);CHKERRQ(ierr); 31 } 32 for (i=0; i<imax; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);} 33 ierr = ISExpandIndicesGeneral(N,bs,imax,is_new,is);CHKERRQ(ierr); 34 for (i=0; i<imax; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);} 35 ierr = PetscFree(is_new);CHKERRQ(ierr); 36 PetscFunctionReturn(0); 37 } 38 39 /* 40 Sample message format: 41 If a processor A wants processor B to process some elements corresponding 42 to index sets is[1], is[5] 43 mesg [0] = 2 (no of index sets in the mesg) 44 ----------- 45 mesg [1] = 1 => is[1] 46 mesg [2] = sizeof(is[1]); 47 ----------- 48 mesg [5] = 5 => is[5] 49 mesg [6] = sizeof(is[5]); 50 ----------- 51 mesg [7] 52 mesg [n] data(is[1]) 53 ----------- 54 mesg[n+1] 55 mesg[m] data(is[5]) 56 ----------- 57 58 Notes: 59 nrqs - no of requests sent (or to be sent out) 60 nrqr - no of requests recieved (which have to be or which have been processed 61 */ 62 #undef __FUNCT__ 63 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Once" 64 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Once(Mat C,PetscInt imax,IS is[]) 65 { 66 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 67 const PetscInt **idx,*idx_i; 68 PetscInt *n,*w3,*w4,*rtable,**data,len; 69 PetscErrorCode ierr; 70 PetscMPIInt size,rank,tag1,tag2,*w2,*w1,nrqr; 71 PetscInt Mbs,i,j,k,**rbuf,row,proc,nrqs,msz,**outdat,**ptr; 72 PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2; 73 PetscMPIInt *onodes1,*olengths1,*onodes2,*olengths2; 74 PetscBT *table; 75 MPI_Comm comm; 76 MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; 77 MPI_Status *s_status,*recv_status; 78 79 PetscFunctionBegin; 80 comm = ((PetscObject)C)->comm; 81 size = c->size; 82 rank = c->rank; 83 Mbs = c->Mbs; 84 85 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 86 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 87 88 len = (imax+1)*sizeof(PetscInt*)+ (imax + Mbs)*sizeof(PetscInt); 89 ierr = PetscMalloc(len,&idx);CHKERRQ(ierr); 90 n = (PetscInt*)(idx + imax); 91 rtable = n + imax; 92 93 for (i=0; i<imax; i++) { 94 ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr); 95 ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr); 96 } 97 98 /* Create hash table for the mapping :row -> proc*/ 99 for (i=0,j=0; i<size; i++) { 100 len = c->rangebs[i+1]; 101 for (; j<len; j++) { 102 rtable[j] = i; 103 } 104 } 105 106 /* evaluate communication - mesg to who,length of mesg, and buffer space 107 required. Based on this, buffers are allocated, and data copied into them*/ 108 ierr = PetscMalloc(size*2*sizeof(PetscInt)+size*2*sizeof(PetscMPIInt),&w1);CHKERRQ(ierr);/* mesg size */ 109 w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/ 110 w3 = (PetscInt*) (w2 + size); /* no of IS that needs to be sent to proc i */ 111 w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */ 112 ierr = PetscMemzero(w1,size*sizeof(PetscInt)+2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); /* initialise work vector*/ 113 for (i=0; i<imax; i++) { 114 ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/ 115 idx_i = idx[i]; 116 len = n[i]; 117 for (j=0; j<len; j++) { 118 row = idx_i[j]; 119 if (row < 0) { 120 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries"); 121 } 122 proc = rtable[row]; 123 w4[proc]++; 124 } 125 for (j=0; j<size; j++){ 126 if (w4[j]) { w1[j] += w4[j]; w3[j]++;} 127 } 128 } 129 130 nrqs = 0; /* no of outgoing messages */ 131 msz = 0; /* total mesg length (for all proc */ 132 w1[rank] = 0; /* no mesg sent to itself */ 133 w3[rank] = 0; 134 for (i=0; i<size; i++) { 135 if (w1[i]) {w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 136 } 137 /* pa - is list of processors to communicate with */ 138 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); 139 for (i=0,j=0; i<size; i++) { 140 if (w1[i]) {pa[j] = i; j++;} 141 } 142 143 /* Each message would have a header = 1 + 2*(no of IS) + data */ 144 for (i=0; i<nrqs; i++) { 145 j = pa[i]; 146 w1[j] += w2[j] + 2*w3[j]; 147 msz += w1[j]; 148 } 149 150 /* Determine the number of messages to expect, their lengths, from from-ids */ 151 ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 152 ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 153 154 /* Now post the Irecvs corresponding to these messages */ 155 ierr = PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);CHKERRQ(ierr); 156 157 /* Allocate Memory for outgoing messages */ 158 len = 2*size*sizeof(PetscInt*) + (size+msz)*sizeof(PetscInt); 159 ierr = PetscMalloc(len,&outdat);CHKERRQ(ierr); 160 ptr = outdat + size; /* Pointers to the data in outgoing buffers */ 161 ierr = PetscMemzero(outdat,2*size*sizeof(PetscInt*));CHKERRQ(ierr); 162 tmp = (PetscInt*)(outdat + 2*size); 163 ctr = tmp + msz; 164 165 { 166 PetscInt *iptr = tmp,ict = 0; 167 for (i=0; i<nrqs; i++) { 168 j = pa[i]; 169 iptr += ict; 170 outdat[j] = iptr; 171 ict = w1[j]; 172 } 173 } 174 175 /* Form the outgoing messages */ 176 /*plug in the headers*/ 177 for (i=0; i<nrqs; i++) { 178 j = pa[i]; 179 outdat[j][0] = 0; 180 ierr = PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 181 ptr[j] = outdat[j] + 2*w3[j] + 1; 182 } 183 184 /* Memory for doing local proc's work*/ 185 { 186 PetscInt *d_p; 187 char *t_p; 188 189 len = (imax)*(sizeof(PetscBT) + sizeof(PetscInt*)+ sizeof(PetscInt)) + 190 (Mbs)*imax*sizeof(PetscInt) + (Mbs/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char) + 1; 191 ierr = PetscMalloc(len,&table);CHKERRQ(ierr); 192 ierr = PetscMemzero(table,len);CHKERRQ(ierr); 193 data = (PetscInt **)(table + imax); 194 isz = (PetscInt *)(data + imax); 195 d_p = (PetscInt *)(isz + imax); 196 t_p = (char *)(d_p + Mbs*imax); 197 for (i=0; i<imax; i++) { 198 table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i; 199 data[i] = d_p + (Mbs)*i; 200 } 201 } 202 203 /* Parse the IS and update local tables and the outgoing buf with the data*/ 204 { 205 PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j; 206 PetscBT table_i; 207 208 for (i=0; i<imax; i++) { 209 ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 210 n_i = n[i]; 211 table_i = table[i]; 212 idx_i = idx[i]; 213 data_i = data[i]; 214 isz_i = isz[i]; 215 for (j=0; j<n_i; j++) { /* parse the indices of each IS */ 216 row = idx_i[j]; 217 proc = rtable[row]; 218 if (proc != rank) { /* copy to the outgoing buffer */ 219 ctr[proc]++; 220 *ptr[proc] = row; 221 ptr[proc]++; 222 } 223 else { /* Update the local table */ 224 if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;} 225 } 226 } 227 /* Update the headers for the current IS */ 228 for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */ 229 if ((ctr_j = ctr[j])) { 230 outdat_j = outdat[j]; 231 k = ++outdat_j[0]; 232 outdat_j[2*k] = ctr_j; 233 outdat_j[2*k-1] = i; 234 } 235 } 236 isz[i] = isz_i; 237 } 238 } 239 240 /* Now post the sends */ 241 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 242 for (i=0; i<nrqs; ++i) { 243 j = pa[i]; 244 ierr = MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);CHKERRQ(ierr); 245 } 246 247 /* No longer need the original indices*/ 248 for (i=0; i<imax; ++i) { 249 ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr); 250 } 251 ierr = PetscFree(idx);CHKERRQ(ierr); 252 253 for (i=0; i<imax; ++i) { 254 ierr = ISDestroy(is[i]);CHKERRQ(ierr); 255 } 256 257 /* Do Local work*/ 258 ierr = MatIncreaseOverlap_MPIBAIJ_Local(C,imax,table,isz,data);CHKERRQ(ierr); 259 260 /* Receive messages*/ 261 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);CHKERRQ(ierr); 262 if (nrqr) {ierr = MPI_Waitall(nrqr,r_waits1,recv_status);CHKERRQ(ierr);} 263 264 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);CHKERRQ(ierr); 265 if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);} 266 267 /* Phase 1 sends are complete - deallocate buffers */ 268 ierr = PetscFree(outdat);CHKERRQ(ierr); 269 ierr = PetscFree(w1);CHKERRQ(ierr); 270 271 ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);CHKERRQ(ierr); 272 ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);CHKERRQ(ierr); 273 ierr = MatIncreaseOverlap_MPIBAIJ_Receive(C,nrqr,rbuf,xdata,isz1);CHKERRQ(ierr); 274 ierr = PetscFree(rbuf);CHKERRQ(ierr); 275 276 /* Send the data back*/ 277 /* Do a global reduction to know the buffer space req for incoming messages*/ 278 { 279 PetscMPIInt *rw1; 280 281 ierr = PetscMalloc(size*sizeof(PetscInt),&rw1);CHKERRQ(ierr); 282 ierr = PetscMemzero(rw1,size*sizeof(PetscInt));CHKERRQ(ierr); 283 284 for (i=0; i<nrqr; ++i) { 285 proc = recv_status[i].MPI_SOURCE; 286 if (proc != onodes1[i]) SETERRQ(PETSC_ERR_PLIB,"MPI_SOURCE mismatch"); 287 rw1[proc] = isz1[i]; 288 } 289 290 ierr = PetscFree(onodes1);CHKERRQ(ierr); 291 ierr = PetscFree(olengths1);CHKERRQ(ierr); 292 293 /* Determine the number of messages to expect, their lengths, from from-ids */ 294 ierr = PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);CHKERRQ(ierr); 295 ierr = PetscFree(rw1);CHKERRQ(ierr); 296 } 297 /* Now post the Irecvs corresponding to these messages */ 298 ierr = PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);CHKERRQ(ierr); 299 300 /* Now post the sends */ 301 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 302 for (i=0; i<nrqr; ++i) { 303 j = recv_status[i].MPI_SOURCE; 304 ierr = MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);CHKERRQ(ierr); 305 } 306 307 /* receive work done on other processors*/ 308 { 309 PetscMPIInt idex; 310 PetscInt is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax; 311 PetscBT table_i; 312 MPI_Status *status2; 313 314 ierr = PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);CHKERRQ(ierr); 315 316 for (i=0; i<nrqs; ++i) { 317 ierr = MPI_Waitany(nrqs,r_waits2,&idex,status2+i);CHKERRQ(ierr); 318 /* Process the message*/ 319 rbuf2_i = rbuf2[idex]; 320 ct1 = 2*rbuf2_i[0]+1; 321 jmax = rbuf2[idex][0]; 322 for (j=1; j<=jmax; j++) { 323 max = rbuf2_i[2*j]; 324 is_no = rbuf2_i[2*j-1]; 325 isz_i = isz[is_no]; 326 data_i = data[is_no]; 327 table_i = table[is_no]; 328 for (k=0; k<max; k++,ct1++) { 329 row = rbuf2_i[ct1]; 330 if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;} 331 } 332 isz[is_no] = isz_i; 333 } 334 } 335 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,status2);CHKERRQ(ierr);} 336 ierr = PetscFree(status2);CHKERRQ(ierr); 337 } 338 339 for (i=0; i<imax; ++i) { 340 ierr = ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],is+i);CHKERRQ(ierr); 341 } 342 343 344 ierr = PetscFree(onodes2);CHKERRQ(ierr); 345 ierr = PetscFree(olengths2);CHKERRQ(ierr); 346 347 ierr = PetscFree(pa);CHKERRQ(ierr); 348 ierr = PetscFree(rbuf2);CHKERRQ(ierr); 349 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 350 ierr = PetscFree(r_waits1);CHKERRQ(ierr); 351 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 352 ierr = PetscFree(r_waits2);CHKERRQ(ierr); 353 ierr = PetscFree(table);CHKERRQ(ierr); 354 ierr = PetscFree(s_status);CHKERRQ(ierr); 355 ierr = PetscFree(recv_status);CHKERRQ(ierr); 356 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 357 ierr = PetscFree(xdata);CHKERRQ(ierr); 358 ierr = PetscFree(isz1);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Local" 364 /* 365 MatIncreaseOverlap_MPIBAIJ_Local - Called by MatincreaseOverlap, to do 366 the work on the local processor. 367 368 Inputs: 369 C - MAT_MPIBAIJ; 370 imax - total no of index sets processed at a time; 371 table - an array of char - size = Mbs bits. 372 373 Output: 374 isz - array containing the count of the solution elements corresponding 375 to each index set; 376 data - pointer to the solutions 377 */ 378 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data) 379 { 380 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 381 Mat A = c->A,B = c->B; 382 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; 383 PetscInt start,end,val,max,rstart,cstart,*ai,*aj; 384 PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i; 385 PetscBT table_i; 386 387 PetscFunctionBegin; 388 rstart = c->rstartbs; 389 cstart = c->cstartbs; 390 ai = a->i; 391 aj = a->j; 392 bi = b->i; 393 bj = b->j; 394 garray = c->garray; 395 396 397 for (i=0; i<imax; i++) { 398 data_i = data[i]; 399 table_i = table[i]; 400 isz_i = isz[i]; 401 for (j=0,max=isz[i]; j<max; j++) { 402 row = data_i[j] - rstart; 403 start = ai[row]; 404 end = ai[row+1]; 405 for (k=start; k<end; k++) { /* Amat */ 406 val = aj[k] + cstart; 407 if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;} 408 } 409 start = bi[row]; 410 end = bi[row+1]; 411 for (k=start; k<end; k++) { /* Bmat */ 412 val = garray[bj[k]]; 413 if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;} 414 } 415 } 416 isz[i] = isz_i; 417 } 418 PetscFunctionReturn(0); 419 } 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Receive" 422 /* 423 MatIncreaseOverlap_MPIBAIJ_Receive - Process the recieved messages, 424 and return the output 425 426 Input: 427 C - the matrix 428 nrqr - no of messages being processed. 429 rbuf - an array of pointers to the recieved requests 430 431 Output: 432 xdata - array of messages to be sent back 433 isz1 - size of each message 434 435 For better efficiency perhaps we should malloc separately each xdata[i], 436 then if a remalloc is required we need only copy the data for that one row 437 rather than all previous rows as it is now where a single large chunck of 438 memory is used. 439 440 */ 441 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1) 442 { 443 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 444 Mat A = c->A,B = c->B; 445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; 446 PetscErrorCode ierr; 447 PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; 448 PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; 449 PetscInt val,max1,max2,rank,Mbs,no_malloc =0,*tmp,new_estimate,ctr; 450 PetscInt *rbuf_i,kmax,rbuf_0; 451 PetscBT xtable; 452 453 PetscFunctionBegin; 454 rank = c->rank; 455 Mbs = c->Mbs; 456 rstart = c->rstartbs; 457 cstart = c->cstartbs; 458 ai = a->i; 459 aj = a->j; 460 bi = b->i; 461 bj = b->j; 462 garray = c->garray; 463 464 465 for (i=0,ct=0,total_sz=0; i<nrqr; ++i) { 466 rbuf_i = rbuf[i]; 467 rbuf_0 = rbuf_i[0]; 468 ct += rbuf_0; 469 for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; } 470 } 471 472 if (c->Mbs) max1 = ct*(a->nz +b->nz)/c->Mbs; 473 else max1 = 1; 474 mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); 475 ierr = PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);CHKERRQ(ierr); 476 ++no_malloc; 477 ierr = PetscBTCreate(Mbs,xtable);CHKERRQ(ierr); 478 ierr = PetscMemzero(isz1,nrqr*sizeof(PetscInt));CHKERRQ(ierr); 479 480 ct3 = 0; 481 for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */ 482 rbuf_i = rbuf[i]; 483 rbuf_0 = rbuf_i[0]; 484 ct1 = 2*rbuf_0+1; 485 ct2 = ct1; 486 ct3 += ct1; 487 for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/ 488 ierr = PetscBTMemzero(Mbs,xtable);CHKERRQ(ierr); 489 oct2 = ct2; 490 kmax = rbuf_i[2*j]; 491 for (k=0; k<kmax; k++,ct1++) { 492 row = rbuf_i[ct1]; 493 if (!PetscBTLookupSet(xtable,row)) { 494 if (!(ct3 < mem_estimate)) { 495 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 496 ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 497 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 498 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 499 xdata[0] = tmp; 500 mem_estimate = new_estimate; ++no_malloc; 501 for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 502 } 503 xdata[i][ct2++] = row; 504 ct3++; 505 } 506 } 507 for (k=oct2,max2=ct2; k<max2; k++) { 508 row = xdata[i][k] - rstart; 509 start = ai[row]; 510 end = ai[row+1]; 511 for (l=start; l<end; l++) { 512 val = aj[l] + cstart; 513 if (!PetscBTLookupSet(xtable,val)) { 514 if (!(ct3 < mem_estimate)) { 515 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 516 ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 517 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 518 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 519 xdata[0] = tmp; 520 mem_estimate = new_estimate; ++no_malloc; 521 for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 522 } 523 xdata[i][ct2++] = val; 524 ct3++; 525 } 526 } 527 start = bi[row]; 528 end = bi[row+1]; 529 for (l=start; l<end; l++) { 530 val = garray[bj[l]]; 531 if (!PetscBTLookupSet(xtable,val)) { 532 if (!(ct3 < mem_estimate)) { 533 new_estimate = (PetscInt)(1.5*mem_estimate)+1; 534 ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr); 535 ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr); 536 ierr = PetscFree(xdata[0]);CHKERRQ(ierr); 537 xdata[0] = tmp; 538 mem_estimate = new_estimate; ++no_malloc; 539 for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} 540 } 541 xdata[i][ct2++] = val; 542 ct3++; 543 } 544 } 545 } 546 /* Update the header*/ 547 xdata[i][2*j] = ct2 - oct2; /* Undo the vector isz1 and use only a var*/ 548 xdata[i][2*j-1] = rbuf_i[2*j-1]; 549 } 550 xdata[i][0] = rbuf_0; 551 xdata[i+1] = xdata[i] + ct2; 552 isz1[i] = ct2; /* size of each message */ 553 } 554 ierr = PetscBTDestroy(xtable);CHKERRQ(ierr); 555 ierr = PetscInfo3(C,"Allocated %D bytes, required %D, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 static PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *); 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ" 563 PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) 564 { 565 IS *isrow_new,*iscol_new; 566 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 567 PetscErrorCode ierr; 568 PetscInt nmax,nstages_local,nstages,i,pos,max_no,N=C->cmap->N,bs=C->rmap->bs; 569 570 PetscFunctionBegin; 571 /* The compression and expansion should be avoided. Does'nt point 572 out errors might change the indices hence buggey */ 573 574 ierr = PetscMalloc(2*(ismax+1)*sizeof(IS),&isrow_new);CHKERRQ(ierr); 575 iscol_new = isrow_new + ismax; 576 ierr = ISCompressIndicesSorted(N,bs,ismax,isrow,isrow_new);CHKERRQ(ierr); 577 ierr = ISCompressIndicesSorted(N,bs,ismax,iscol,iscol_new);CHKERRQ(ierr); 578 579 /* Allocate memory to hold all the submatrices */ 580 if (scall != MAT_REUSE_MATRIX) { 581 ierr = PetscMalloc((ismax+1)*sizeof(Mat),submat);CHKERRQ(ierr); 582 } 583 /* Determine the number of stages through which submatrices are done */ 584 nmax = 20*1000000 / (c->Nbs * sizeof(PetscInt)); 585 if (!nmax) nmax = 1; 586 nstages_local = ismax/nmax + ((ismax % nmax)?1:0); 587 588 /* Make sure every processor loops through the nstages */ 589 ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);CHKERRQ(ierr); 590 for (i=0,pos=0; i<nstages; i++) { 591 if (pos+nmax <= ismax) max_no = nmax; 592 else if (pos == ismax) max_no = 0; 593 else max_no = ismax-pos; 594 ierr = MatGetSubMatrices_MPIBAIJ_local(C,max_no,isrow_new+pos,iscol_new+pos,scall,*submat+pos);CHKERRQ(ierr); 595 pos += max_no; 596 } 597 598 for (i=0; i<ismax; i++) { 599 ierr = ISDestroy(isrow_new[i]);CHKERRQ(ierr); 600 ierr = ISDestroy(iscol_new[i]);CHKERRQ(ierr); 601 } 602 ierr = PetscFree(isrow_new);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #if defined (PETSC_USE_CTABLE) 607 #undef __FUNCT__ 608 #define __FUNCT__ "PetscGetProc" 609 PetscErrorCode PetscGetProc(const PetscInt row, const PetscMPIInt size, const PetscInt proc_gnode[], PetscMPIInt *rank) 610 { 611 PetscInt nGlobalNd = proc_gnode[size]; 612 PetscMPIInt fproc = PetscMPIIntCast( (PetscInt)(((float)row * (float)size / (float)nGlobalNd + 0.5))); 613 614 PetscFunctionBegin; 615 if (fproc > size) fproc = size; 616 while (row < proc_gnode[fproc] || row >= proc_gnode[fproc+1]) { 617 if (row < proc_gnode[fproc]) fproc--; 618 else fproc++; 619 } 620 *rank = fproc; 621 PetscFunctionReturn(0); 622 } 623 #endif 624 625 /* -------------------------------------------------------------------------*/ 626 /* This code is used for BAIJ and SBAIJ matrices (unfortunate dependency) */ 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ_local" 629 static PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) 630 { 631 Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; 632 Mat A = c->A; 633 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*mat; 634 const PetscInt **irow,**icol,*irow_i; 635 PetscInt *nrow,*ncol,*w3,*w4,start; 636 PetscErrorCode ierr; 637 PetscMPIInt size,tag0,tag1,tag2,tag3,*w1,*w2,nrqr,idex,end,proc; 638 PetscInt **sbuf1,**sbuf2,rank,i,j,k,l,ct1,ct2,**rbuf1,row; 639 PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol; 640 PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2; 641 PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; 642 PetscInt len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; 643 PetscInt bs=C->rmap->bs,bs2=c->bs2,*a_j=a->j,*b_j=b->j,*cworkA,*cworkB; 644 PetscInt cstart = c->cstartbs,nzA,nzB,*a_i=a->i,*b_i=b->i,imark; 645 PetscInt *bmap = c->garray,ctmp,rstart=c->rstartbs; 646 MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; 647 MPI_Request *r_waits4,*s_waits3,*s_waits4; 648 MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; 649 MPI_Status *r_status3,*r_status4,*s_status4; 650 MPI_Comm comm; 651 MatScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i,*vworkA,*vworkB; 652 MatScalar *a_a=a->a,*b_a=b->a; 653 PetscTruth flag,sorted; 654 PetscMPIInt *onodes1,*olengths1; 655 656 #if defined (PETSC_USE_CTABLE) 657 PetscInt tt; 658 PetscTable *rowmaps,*colmaps,lrow1_grow1,lcol1_gcol1; 659 #else 660 PetscInt **cmap,*cmap_i,*rtable,*rmap_i,**rmap, Mbs = c->Mbs; 661 #endif 662 663 PetscFunctionBegin; 664 comm = ((PetscObject)C)->comm; 665 tag0 = ((PetscObject)C)->tag; 666 size = c->size; 667 rank = c->rank; 668 669 /* Get some new tags to keep the communication clean */ 670 ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); 671 ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); 672 ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); 673 674 /* Check if the col indices are sorted */ 675 for (i=0; i<ismax; i++) { 676 ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr); 677 if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted"); 678 } 679 680 len = (2*ismax+1)*(sizeof(PetscInt*)+ sizeof(PetscInt)); 681 #if !defined (PETSC_USE_CTABLE) 682 len += (Mbs+1)*sizeof(PetscInt); 683 #endif 684 ierr = PetscMalloc(len,&irow);CHKERRQ(ierr); 685 icol = irow + ismax; 686 nrow = (PetscInt*)(icol + ismax); 687 ncol = nrow + ismax; 688 #if !defined (PETSC_USE_CTABLE) 689 rtable = ncol + ismax; 690 /* Create hash table for the mapping :row -> proc*/ 691 for (i=0,j=0; i<size; i++) { 692 jmax = c->rowners[i+1]; 693 for (; j<jmax; j++) { 694 rtable[j] = i; 695 } 696 } 697 #endif 698 699 for (i=0; i<ismax; i++) { 700 ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); 701 ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); 702 ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); 703 ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); 704 } 705 706 /* evaluate communication - mesg to who,length of mesg,and buffer space 707 required. Based on this, buffers are allocated, and data copied into them*/ 708 ierr = PetscMalloc(size*2*sizeof(PetscInt) + size*2*sizeof(PetscMPIInt),&w1);CHKERRQ(ierr); /* mesg size */ 709 w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/ 710 w3 = (PetscInt*)(w2 + size); /* no of IS that needs to be sent to proc i */ 711 w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */ 712 ierr = PetscMemzero(w1,size*sizeof(PetscInt)+2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); /* initialise work vector*/ 713 for (i=0; i<ismax; i++) { 714 ierr = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/ 715 jmax = nrow[i]; 716 irow_i = irow[i]; 717 for (j=0; j<jmax; j++) { 718 row = irow_i[j]; 719 #if defined (PETSC_USE_CTABLE) 720 ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 721 #else 722 proc = rtable[row]; 723 #endif 724 w4[proc]++; 725 } 726 for (j=0; j<size; j++) { 727 if (w4[j]) { w1[j] += w4[j]; w3[j]++;} 728 } 729 } 730 731 nrqs = 0; /* no of outgoing messages */ 732 msz = 0; /* total mesg length for all proc */ 733 w1[rank] = 0; /* no mesg sent to intself */ 734 w3[rank] = 0; 735 for (i=0; i<size; i++) { 736 if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */ 737 } 738 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/ 739 for (i=0,j=0; i<size; i++) { 740 if (w1[i]) { pa[j] = i; j++; } 741 } 742 743 /* Each message would have a header = 1 + 2*(no of IS) + data */ 744 for (i=0; i<nrqs; i++) { 745 j = pa[i]; 746 w1[j] += w2[j] + 2* w3[j]; 747 msz += w1[j]; 748 } 749 750 /* Determine the number of messages to expect, their lengths, from from-ids */ 751 ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr); 752 ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr); 753 754 /* Now post the Irecvs corresponding to these messages */ 755 ierr = PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);CHKERRQ(ierr); 756 757 ierr = PetscFree(onodes1);CHKERRQ(ierr); 758 ierr = PetscFree(olengths1);CHKERRQ(ierr); 759 760 /* Allocate Memory for outgoing messages */ 761 len = 2*size*sizeof(PetscInt*) + 2*msz*sizeof(PetscInt) + size*sizeof(PetscInt); 762 ierr = PetscMalloc(len,&sbuf1);CHKERRQ(ierr); 763 ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */ 764 ierr = PetscMemzero(sbuf1,2*size*sizeof(PetscInt*));CHKERRQ(ierr); 765 /* allocate memory for outgoing data + buf to receive the first reply */ 766 tmp = (PetscInt*)(ptr + size); 767 ctr = tmp + 2*msz; 768 769 { 770 PetscInt *iptr = tmp,ict = 0; 771 for (i=0; i<nrqs; i++) { 772 j = pa[i]; 773 iptr += ict; 774 sbuf1[j] = iptr; 775 ict = w1[j]; 776 } 777 } 778 779 /* Form the outgoing messages */ 780 /* Initialise the header space */ 781 for (i=0; i<nrqs; i++) { 782 j = pa[i]; 783 sbuf1[j][0] = 0; 784 ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr); 785 ptr[j] = sbuf1[j] + 2*w3[j] + 1; 786 } 787 788 /* Parse the isrow and copy data into outbuf */ 789 for (i=0; i<ismax; i++) { 790 ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr); 791 irow_i = irow[i]; 792 jmax = nrow[i]; 793 for (j=0; j<jmax; j++) { /* parse the indices of each IS */ 794 row = irow_i[j]; 795 #if defined (PETSC_USE_CTABLE) 796 ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 797 #else 798 proc = rtable[row]; 799 #endif 800 if (proc != rank) { /* copy to the outgoing buf*/ 801 ctr[proc]++; 802 *ptr[proc] = row; 803 ptr[proc]++; 804 } 805 } 806 /* Update the headers for the current IS */ 807 for (j=0; j<size; j++) { /* Can Optimise this loop too */ 808 if ((ctr_j = ctr[j])) { 809 sbuf1_j = sbuf1[j]; 810 k = ++sbuf1_j[0]; 811 sbuf1_j[2*k] = ctr_j; 812 sbuf1_j[2*k-1] = i; 813 } 814 } 815 } 816 817 /* Now post the sends */ 818 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr); 819 for (i=0; i<nrqs; ++i) { 820 j = pa[i]; 821 ierr = MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr); 822 } 823 824 /* Post Recieves to capture the buffer size */ 825 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr); 826 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);CHKERRQ(ierr); 827 rbuf2[0] = tmp + msz; 828 for (i=1; i<nrqs; ++i) { 829 j = pa[i]; 830 rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]]; 831 } 832 for (i=0; i<nrqs; ++i) { 833 j = pa[i]; 834 ierr = MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);CHKERRQ(ierr); 835 } 836 837 /* Send to other procs the buf size they should allocate */ 838 839 /* Receive messages*/ 840 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr); 841 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr); 842 len = 2*nrqr*sizeof(PetscInt) + (nrqr+1)*sizeof(PetscInt*); 843 ierr = PetscMalloc(len,&sbuf2);CHKERRQ(ierr); 844 req_size = (PetscInt*)(sbuf2 + nrqr); 845 req_source = req_size + nrqr; 846 847 { 848 Mat_SeqBAIJ *sA = (Mat_SeqBAIJ*)c->A->data,*sB = (Mat_SeqBAIJ*)c->B->data; 849 PetscInt *sAi = sA->i,*sBi = sB->i,id,*sbuf2_i; 850 851 for (i=0; i<nrqr; ++i) { 852 ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr); 853 req_size[idex] = 0; 854 rbuf1_i = rbuf1[idex]; 855 start = 2*rbuf1_i[0] + 1; 856 ierr = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr); 857 ierr = PetscMalloc(end*sizeof(PetscInt),&sbuf2[idex]);CHKERRQ(ierr); 858 sbuf2_i = sbuf2[idex]; 859 for (j=start; j<end; j++) { 860 id = rbuf1_i[j] - rstart; 861 ncols = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id]; 862 sbuf2_i[j] = ncols; 863 req_size[idex] += ncols; 864 } 865 req_source[idex] = r_status1[i].MPI_SOURCE; 866 /* form the header */ 867 sbuf2_i[0] = req_size[idex]; 868 for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; } 869 ierr = MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);CHKERRQ(ierr); 870 } 871 } 872 ierr = PetscFree(r_status1);CHKERRQ(ierr); 873 ierr = PetscFree(r_waits1);CHKERRQ(ierr); 874 875 /* recv buffer sizes */ 876 /* Receive messages*/ 877 878 ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);CHKERRQ(ierr); 879 ierr = PetscMalloc((nrqs+1)*sizeof(MatScalar*),&rbuf4);CHKERRQ(ierr); 880 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);CHKERRQ(ierr); 881 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);CHKERRQ(ierr); 882 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr); 883 884 for (i=0; i<nrqs; ++i) { 885 ierr = MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);CHKERRQ(ierr); 886 ierr = PetscMalloc(rbuf2[idex][0]*sizeof(PetscInt),&rbuf3[idex]);CHKERRQ(ierr); 887 ierr = PetscMalloc(rbuf2[idex][0]*bs2*sizeof(MatScalar),&rbuf4[idex]);CHKERRQ(ierr); 888 ierr = MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);CHKERRQ(ierr); 889 ierr = MPI_Irecv(rbuf4[idex],rbuf2[idex][0]*bs2,MPIU_MATSCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(r_status2);CHKERRQ(ierr); 892 ierr = PetscFree(r_waits2);CHKERRQ(ierr); 893 894 /* Wait on sends1 and sends2 */ 895 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); 896 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr); 897 898 if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} 899 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);} 900 ierr = PetscFree(s_status1);CHKERRQ(ierr); 901 ierr = PetscFree(s_status2);CHKERRQ(ierr); 902 ierr = PetscFree(s_waits1);CHKERRQ(ierr); 903 ierr = PetscFree(s_waits2);CHKERRQ(ierr); 904 905 /* Now allocate buffers for a->j, and send them off */ 906 ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);CHKERRQ(ierr); 907 for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 908 ierr = PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);CHKERRQ(ierr); 909 for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1]; 910 911 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);CHKERRQ(ierr); 912 { 913 for (i=0; i<nrqr; i++) { 914 rbuf1_i = rbuf1[i]; 915 sbuf_aj_i = sbuf_aj[i]; 916 ct1 = 2*rbuf1_i[0] + 1; 917 ct2 = 0; 918 for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 919 kmax = rbuf1[i][2*j]; 920 for (k=0; k<kmax; k++,ct1++) { 921 row = rbuf1_i[ct1] - rstart; 922 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 923 ncols = nzA + nzB; 924 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 925 926 /* load the column indices for this row into cols*/ 927 cols = sbuf_aj_i + ct2; 928 for (l=0; l<nzB; l++) { 929 if ((ctmp = bmap[cworkB[l]]) < cstart) cols[l] = ctmp; 930 else break; 931 } 932 imark = l; 933 for (l=0; l<nzA; l++) cols[imark+l] = cstart + cworkA[l]; 934 for (l=imark; l<nzB; l++) cols[nzA+l] = bmap[cworkB[l]]; 935 ct2 += ncols; 936 } 937 } 938 ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);CHKERRQ(ierr); 939 } 940 } 941 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);CHKERRQ(ierr); 942 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);CHKERRQ(ierr); 943 944 /* Allocate buffers for a->a, and send them off */ 945 ierr = PetscMalloc((nrqr+1)*sizeof(MatScalar *),&sbuf_aa);CHKERRQ(ierr); 946 for (i=0,j=0; i<nrqr; i++) j += req_size[i]; 947 ierr = PetscMalloc((j+1)*bs2*sizeof(MatScalar),&sbuf_aa[0]);CHKERRQ(ierr); 948 for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1]*bs2; 949 950 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);CHKERRQ(ierr); 951 { 952 for (i=0; i<nrqr; i++) { 953 rbuf1_i = rbuf1[i]; 954 sbuf_aa_i = sbuf_aa[i]; 955 ct1 = 2*rbuf1_i[0]+1; 956 ct2 = 0; 957 for (j=1,max1=rbuf1_i[0]; j<=max1; j++) { 958 kmax = rbuf1_i[2*j]; 959 for (k=0; k<kmax; k++,ct1++) { 960 row = rbuf1_i[ct1] - rstart; 961 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 962 ncols = nzA + nzB; 963 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 964 vworkA = a_a + a_i[row]*bs2; vworkB = b_a + b_i[row]*bs2; 965 966 /* load the column values for this row into vals*/ 967 vals = sbuf_aa_i+ct2*bs2; 968 for (l=0; l<nzB; l++) { 969 if ((bmap[cworkB[l]]) < cstart) { 970 ierr = PetscMemcpy(vals+l*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 971 } 972 else break; 973 } 974 imark = l; 975 for (l=0; l<nzA; l++) { 976 ierr = PetscMemcpy(vals+(imark+l)*bs2,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 977 } 978 for (l=imark; l<nzB; l++) { 979 ierr = PetscMemcpy(vals+(nzA+l)*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 ct2 += ncols; 982 } 983 } 984 ierr = MPI_Isend(sbuf_aa_i,req_size[i]*bs2,MPIU_MATSCALAR,req_source[i],tag3,comm,s_waits4+i);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);CHKERRQ(ierr); 988 ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);CHKERRQ(ierr); 989 ierr = PetscFree(rbuf1);CHKERRQ(ierr); 990 991 /* Form the matrix */ 992 /* create col map */ 993 { 994 const PetscInt *icol_i; 995 #if defined (PETSC_USE_CTABLE) 996 /* Create row map*/ 997 ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&colmaps);CHKERRQ(ierr); 998 for (i=0; i<ismax; i++) { 999 ierr = PetscTableCreate(ncol[i]+1,&colmaps[i]);CHKERRQ(ierr); 1000 } 1001 #else 1002 len = (1+ismax)*sizeof(PetscInt*)+ ismax*c->Nbs*sizeof(PetscInt); 1003 ierr = PetscMalloc(len,&cmap);CHKERRQ(ierr); 1004 cmap[0] = (PetscInt*)(cmap + ismax); 1005 ierr = PetscMemzero(cmap[0],(1+ismax*c->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1006 for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + c->Nbs; } 1007 #endif 1008 for (i=0; i<ismax; i++) { 1009 jmax = ncol[i]; 1010 icol_i = icol[i]; 1011 #if defined (PETSC_USE_CTABLE) 1012 lcol1_gcol1 = colmaps[i]; 1013 for (j=0; j<jmax; j++) { 1014 ierr = PetscTableAdd(lcol1_gcol1,icol_i[j]+1,j+1);CHKERRQ(ierr); 1015 } 1016 #else 1017 cmap_i = cmap[i]; 1018 for (j=0; j<jmax; j++) { 1019 cmap_i[icol_i[j]] = j+1; 1020 } 1021 #endif 1022 } 1023 } 1024 1025 /* Create lens which is required for MatCreate... */ 1026 for (i=0,j=0; i<ismax; i++) { j += nrow[i]; } 1027 len = (1+ismax)*sizeof(PetscInt*)+ j*sizeof(PetscInt); 1028 ierr = PetscMalloc(len,&lens);CHKERRQ(ierr); 1029 lens[0] = (PetscInt*)(lens + ismax); 1030 ierr = PetscMemzero(lens[0],j*sizeof(PetscInt));CHKERRQ(ierr); 1031 for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; } 1032 1033 /* Update lens from local data */ 1034 for (i=0; i<ismax; i++) { 1035 jmax = nrow[i]; 1036 #if defined (PETSC_USE_CTABLE) 1037 lcol1_gcol1 = colmaps[i]; 1038 #else 1039 cmap_i = cmap[i]; 1040 #endif 1041 irow_i = irow[i]; 1042 lens_i = lens[i]; 1043 for (j=0; j<jmax; j++) { 1044 row = irow_i[j]; 1045 #if defined (PETSC_USE_CTABLE) 1046 ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 1047 #else 1048 proc = rtable[row]; 1049 #endif 1050 if (proc == rank) { 1051 /* Get indices from matA and then from matB */ 1052 row = row - rstart; 1053 nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; 1054 cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; 1055 #if defined (PETSC_USE_CTABLE) 1056 for (k=0; k<nzA; k++) { 1057 ierr = PetscTableFind(lcol1_gcol1,cstart+cworkA[k]+1,&tt);CHKERRQ(ierr); 1058 if (tt) { lens_i[j]++; } 1059 } 1060 for (k=0; k<nzB; k++) { 1061 ierr = PetscTableFind(lcol1_gcol1,bmap[cworkB[k]]+1,&tt);CHKERRQ(ierr); 1062 if (tt) { lens_i[j]++; } 1063 } 1064 #else 1065 for (k=0; k<nzA; k++) { 1066 if (cmap_i[cstart + cworkA[k]]) { lens_i[j]++; } 1067 } 1068 for (k=0; k<nzB; k++) { 1069 if (cmap_i[bmap[cworkB[k]]]) { lens_i[j]++; } 1070 } 1071 #endif 1072 } 1073 } 1074 } 1075 #if defined (PETSC_USE_CTABLE) 1076 /* Create row map*/ 1077 ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&rowmaps);CHKERRQ(ierr); 1078 for (i=0; i<ismax; i++){ 1079 ierr = PetscTableCreate(nrow[i]+1,&rowmaps[i]);CHKERRQ(ierr); 1080 } 1081 #else 1082 /* Create row map*/ 1083 len = (1+ismax)*sizeof(PetscInt*)+ ismax*Mbs*sizeof(PetscInt); 1084 ierr = PetscMalloc(len,&rmap);CHKERRQ(ierr); 1085 rmap[0] = (PetscInt*)(rmap + ismax); 1086 ierr = PetscMemzero(rmap[0],ismax*Mbs*sizeof(PetscInt));CHKERRQ(ierr); 1087 for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + Mbs;} 1088 #endif 1089 for (i=0; i<ismax; i++) { 1090 irow_i = irow[i]; 1091 jmax = nrow[i]; 1092 #if defined (PETSC_USE_CTABLE) 1093 lrow1_grow1 = rowmaps[i]; 1094 for (j=0; j<jmax; j++) { 1095 ierr = PetscTableAdd(lrow1_grow1,irow_i[j]+1,j+1);CHKERRQ(ierr); 1096 } 1097 #else 1098 rmap_i = rmap[i]; 1099 for (j=0; j<jmax; j++) { 1100 rmap_i[irow_i[j]] = j; 1101 } 1102 #endif 1103 } 1104 1105 /* Update lens from offproc data */ 1106 { 1107 PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i; 1108 PetscMPIInt ii; 1109 1110 for (tmp2=0; tmp2<nrqs; tmp2++) { 1111 ierr = MPI_Waitany(nrqs,r_waits3,&ii,r_status3+tmp2);CHKERRQ(ierr); 1112 idex = pa[ii]; 1113 sbuf1_i = sbuf1[idex]; 1114 jmax = sbuf1_i[0]; 1115 ct1 = 2*jmax+1; 1116 ct2 = 0; 1117 rbuf2_i = rbuf2[ii]; 1118 rbuf3_i = rbuf3[ii]; 1119 for (j=1; j<=jmax; j++) { 1120 is_no = sbuf1_i[2*j-1]; 1121 max1 = sbuf1_i[2*j]; 1122 lens_i = lens[is_no]; 1123 #if defined (PETSC_USE_CTABLE) 1124 lcol1_gcol1 = colmaps[is_no]; 1125 lrow1_grow1 = rowmaps[is_no]; 1126 #else 1127 cmap_i = cmap[is_no]; 1128 rmap_i = rmap[is_no]; 1129 #endif 1130 for (k=0; k<max1; k++,ct1++) { 1131 #if defined (PETSC_USE_CTABLE) 1132 ierr = PetscTableFind(lrow1_grow1,sbuf1_i[ct1]+1,&row);CHKERRQ(ierr); 1133 row--; 1134 if (row < 0) { SETERRQ(PETSC_ERR_PLIB,"row not found in table"); } 1135 #else 1136 row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */ 1137 #endif 1138 max2 = rbuf2_i[ct1]; 1139 for (l=0; l<max2; l++,ct2++) { 1140 #if defined (PETSC_USE_CTABLE) 1141 ierr = PetscTableFind(lcol1_gcol1,rbuf3_i[ct2]+1,&tt);CHKERRQ(ierr); 1142 if (tt) { 1143 lens_i[row]++; 1144 } 1145 #else 1146 if (cmap_i[rbuf3_i[ct2]]) { 1147 lens_i[row]++; 1148 } 1149 #endif 1150 } 1151 } 1152 } 1153 } 1154 } 1155 ierr = PetscFree(r_status3);CHKERRQ(ierr); 1156 ierr = PetscFree(r_waits3);CHKERRQ(ierr); 1157 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits3,s_status3);CHKERRQ(ierr);} 1158 ierr = PetscFree(s_status3);CHKERRQ(ierr); 1159 ierr = PetscFree(s_waits3);CHKERRQ(ierr); 1160 1161 /* Create the submatrices */ 1162 if (scall == MAT_REUSE_MATRIX) { 1163 /* 1164 Assumes new rows are same length as the old rows, hence bug! 1165 */ 1166 for (i=0; i<ismax; i++) { 1167 mat = (Mat_SeqBAIJ *)(submats[i]->data); 1168 if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i] || C->rmap->bs != bs)) { 1169 SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); 1170 } 1171 ierr = PetscMemcmp(mat->ilen,lens[i],mat->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr); 1172 if (!flag) { 1173 SETERRQ(PETSC_ERR_ARG_INCOMP,"Cannot reuse matrix. wrong no of nonzeros"); 1174 } 1175 /* Initial matrix as if empty */ 1176 ierr = PetscMemzero(mat->ilen,mat->mbs*sizeof(PetscInt));CHKERRQ(ierr); 1177 submats[i]->factor = C->factor; 1178 } 1179 } else { 1180 for (i=0; i<ismax; i++) { 1181 ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr); 1182 ierr = MatSetSizes(submats[i],nrow[i]*bs,ncol[i]*bs,nrow[i]*bs,ncol[i]*bs);CHKERRQ(ierr); 1183 ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr); 1184 ierr = MatSeqBAIJSetPreallocation(submats[i],C->rmap->bs,0,lens[i]);CHKERRQ(ierr); 1185 ierr = MatSeqSBAIJSetPreallocation(submats[i],C->rmap->bs,0,lens[i]);CHKERRQ(ierr); 1186 } 1187 } 1188 1189 /* Assemble the matrices */ 1190 /* First assemble the local rows */ 1191 { 1192 PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i; 1193 MatScalar *imat_a; 1194 1195 for (i=0; i<ismax; i++) { 1196 mat = (Mat_SeqBAIJ*)submats[i]->data; 1197 imat_ilen = mat->ilen; 1198 imat_j = mat->j; 1199 imat_i = mat->i; 1200 imat_a = mat->a; 1201 1202 #if defined (PETSC_USE_CTABLE) 1203 lcol1_gcol1 = colmaps[i]; 1204 lrow1_grow1 = rowmaps[i]; 1205 #else 1206 cmap_i = cmap[i]; 1207 rmap_i = rmap[i]; 1208 #endif 1209 irow_i = irow[i]; 1210 jmax = nrow[i]; 1211 for (j=0; j<jmax; j++) { 1212 row = irow_i[j]; 1213 #if defined (PETSC_USE_CTABLE) 1214 ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr); 1215 #else 1216 proc = rtable[row]; 1217 #endif 1218 if (proc == rank) { 1219 row = row - rstart; 1220 nzA = a_i[row+1] - a_i[row]; 1221 nzB = b_i[row+1] - b_i[row]; 1222 cworkA = a_j + a_i[row]; 1223 cworkB = b_j + b_i[row]; 1224 vworkA = a_a + a_i[row]*bs2; 1225 vworkB = b_a + b_i[row]*bs2; 1226 #if defined (PETSC_USE_CTABLE) 1227 ierr = PetscTableFind(lrow1_grow1,row+rstart+1,&row);CHKERRQ(ierr); 1228 row--; 1229 if (row < 0) { SETERRQ(PETSC_ERR_PLIB,"row not found in table"); } 1230 #else 1231 row = rmap_i[row + rstart]; 1232 #endif 1233 mat_i = imat_i[row]; 1234 mat_a = imat_a + mat_i*bs2; 1235 mat_j = imat_j + mat_i; 1236 ilen_row = imat_ilen[row]; 1237 1238 /* load the column indices for this row into cols*/ 1239 for (l=0; l<nzB; l++) { 1240 if ((ctmp = bmap[cworkB[l]]) < cstart) { 1241 #if defined (PETSC_USE_CTABLE) 1242 ierr = PetscTableFind(lcol1_gcol1,ctmp+1,&tcol);CHKERRQ(ierr); 1243 if (tcol) { 1244 #else 1245 if ((tcol = cmap_i[ctmp])) { 1246 #endif 1247 *mat_j++ = tcol - 1; 1248 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1249 mat_a += bs2; 1250 ilen_row++; 1251 } 1252 } else break; 1253 } 1254 imark = l; 1255 for (l=0; l<nzA; l++) { 1256 #if defined (PETSC_USE_CTABLE) 1257 ierr = PetscTableFind(lcol1_gcol1,cstart+cworkA[l]+1,&tcol);CHKERRQ(ierr); 1258 if (tcol) { 1259 #else 1260 if ((tcol = cmap_i[cstart + cworkA[l]])) { 1261 #endif 1262 *mat_j++ = tcol - 1; 1263 ierr = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1264 mat_a += bs2; 1265 ilen_row++; 1266 } 1267 } 1268 for (l=imark; l<nzB; l++) { 1269 #if defined (PETSC_USE_CTABLE) 1270 ierr = PetscTableFind(lcol1_gcol1,bmap[cworkB[l]]+1,&tcol);CHKERRQ(ierr); 1271 if (tcol) { 1272 #else 1273 if ((tcol = cmap_i[bmap[cworkB[l]]])) { 1274 #endif 1275 *mat_j++ = tcol - 1; 1276 ierr = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1277 mat_a += bs2; 1278 ilen_row++; 1279 } 1280 } 1281 imat_ilen[row] = ilen_row; 1282 } 1283 } 1284 1285 } 1286 } 1287 1288 /* Now assemble the off proc rows*/ 1289 { 1290 PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen; 1291 PetscInt *imat_j,*imat_i; 1292 MatScalar *imat_a,*rbuf4_i; 1293 PetscMPIInt ii; 1294 1295 for (tmp2=0; tmp2<nrqs; tmp2++) { 1296 ierr = MPI_Waitany(nrqs,r_waits4,&ii,r_status4+tmp2);CHKERRQ(ierr); 1297 idex = pa[ii]; 1298 sbuf1_i = sbuf1[idex]; 1299 jmax = sbuf1_i[0]; 1300 ct1 = 2*jmax + 1; 1301 ct2 = 0; 1302 rbuf2_i = rbuf2[ii]; 1303 rbuf3_i = rbuf3[ii]; 1304 rbuf4_i = rbuf4[ii]; 1305 for (j=1; j<=jmax; j++) { 1306 is_no = sbuf1_i[2*j-1]; 1307 #if defined (PETSC_USE_CTABLE) 1308 lrow1_grow1 = rowmaps[is_no]; 1309 lcol1_gcol1 = colmaps[is_no]; 1310 #else 1311 rmap_i = rmap[is_no]; 1312 cmap_i = cmap[is_no]; 1313 #endif 1314 mat = (Mat_SeqBAIJ*)submats[is_no]->data; 1315 imat_ilen = mat->ilen; 1316 imat_j = mat->j; 1317 imat_i = mat->i; 1318 imat_a = mat->a; 1319 max1 = sbuf1_i[2*j]; 1320 for (k=0; k<max1; k++,ct1++) { 1321 row = sbuf1_i[ct1]; 1322 #if defined (PETSC_USE_CTABLE) 1323 ierr = PetscTableFind(lrow1_grow1,row+1,&row);CHKERRQ(ierr); 1324 row--; 1325 if(row < 0) { SETERRQ(PETSC_ERR_PLIB,"row not found in table"); } 1326 #else 1327 row = rmap_i[row]; 1328 #endif 1329 ilen = imat_ilen[row]; 1330 mat_i = imat_i[row]; 1331 mat_a = imat_a + mat_i*bs2; 1332 mat_j = imat_j + mat_i; 1333 max2 = rbuf2_i[ct1]; 1334 for (l=0; l<max2; l++,ct2++) { 1335 #if defined (PETSC_USE_CTABLE) 1336 ierr = PetscTableFind(lcol1_gcol1,rbuf3_i[ct2]+1,&tcol);CHKERRQ(ierr); 1337 if (tcol) { 1338 #else 1339 if ((tcol = cmap_i[rbuf3_i[ct2]])) { 1340 #endif 1341 *mat_j++ = tcol - 1; 1342 /* *mat_a++= rbuf4_i[ct2]; */ 1343 ierr = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1344 mat_a += bs2; 1345 ilen++; 1346 } 1347 } 1348 imat_ilen[row] = ilen; 1349 } 1350 } 1351 } 1352 } 1353 ierr = PetscFree(r_status4);CHKERRQ(ierr); 1354 ierr = PetscFree(r_waits4);CHKERRQ(ierr); 1355 if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits4,s_status4);CHKERRQ(ierr);} 1356 ierr = PetscFree(s_waits4);CHKERRQ(ierr); 1357 ierr = PetscFree(s_status4);CHKERRQ(ierr); 1358 1359 /* Restore the indices */ 1360 for (i=0; i<ismax; i++) { 1361 ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr); 1362 ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr); 1363 } 1364 1365 /* Destroy allocated memory */ 1366 ierr = PetscFree(irow);CHKERRQ(ierr); 1367 ierr = PetscFree(w1);CHKERRQ(ierr); 1368 ierr = PetscFree(pa);CHKERRQ(ierr); 1369 1370 ierr = PetscFree(sbuf1);CHKERRQ(ierr); 1371 ierr = PetscFree(rbuf2);CHKERRQ(ierr); 1372 for (i=0; i<nrqr; ++i) { 1373 ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr); 1374 } 1375 for (i=0; i<nrqs; ++i) { 1376 ierr = PetscFree(rbuf3[i]);CHKERRQ(ierr); 1377 ierr = PetscFree(rbuf4[i]);CHKERRQ(ierr); 1378 } 1379 1380 ierr = PetscFree(sbuf2);CHKERRQ(ierr); 1381 ierr = PetscFree(rbuf3);CHKERRQ(ierr); 1382 ierr = PetscFree(rbuf4);CHKERRQ(ierr); 1383 ierr = PetscFree(sbuf_aj[0]);CHKERRQ(ierr); 1384 ierr = PetscFree(sbuf_aj);CHKERRQ(ierr); 1385 ierr = PetscFree(sbuf_aa[0]);CHKERRQ(ierr); 1386 ierr = PetscFree(sbuf_aa);CHKERRQ(ierr); 1387 1388 #if defined (PETSC_USE_CTABLE) 1389 for (i=0; i<ismax; i++){ 1390 ierr = PetscTableDestroy(rowmaps[i]);CHKERRQ(ierr); 1391 ierr = PetscTableDestroy(colmaps[i]);CHKERRQ(ierr); 1392 } 1393 ierr = PetscFree(colmaps);CHKERRQ(ierr); 1394 ierr = PetscFree(rowmaps);CHKERRQ(ierr); 1395 #else 1396 ierr = PetscFree(rmap);CHKERRQ(ierr); 1397 ierr = PetscFree(cmap);CHKERRQ(ierr); 1398 #endif 1399 ierr = PetscFree(lens);CHKERRQ(ierr); 1400 1401 for (i=0; i<ismax; i++) { 1402 ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1403 ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408