1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.122 1998/05/12 16:16:36 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" /*I "mat.h" I*/ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNC__ 380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 384 int ierr,i,j,row,col; 385 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 386 int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 387 int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 388 double tmp; 389 Scalar ** HD = baij->hd,value; 390 #if defined(USE_PETSC_BOPT_g) 391 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 392 #endif 393 394 PetscFunctionBegin; 395 396 for ( i=0; i<m; i++ ) { 397 #if defined(USE_PETSC_BOPT_g) 398 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 399 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 400 #endif 401 row = im[i]; 402 if (row >= rstart_orig && row < rend_orig) { 403 for ( j=0; j<n; j++ ) { 404 col = in[j]; 405 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 406 /* Look up into the Hash Table */ 407 key = (row/bs)*Nbs+(col/bs)+1; 408 h1 = HASH(size,key,tmp); 409 410 411 idx = h1; 412 #if defined(USE_PETSC_BOPT_g) 413 insert_ct++; 414 total_ct++; 415 if (HT[idx] != key) { 416 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 417 if (idx == size) { 418 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 419 if (idx == h1) { 420 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 421 } 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 427 if (idx == size) { 428 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 429 if (idx == h1) { 430 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 431 } 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else { 440 if (roworiented && !baij->donotstash) { 441 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 442 } else { 443 if (!baij->donotstash) { 444 row = im[i]; 445 for ( j=0; j<n; j++ ) { 446 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 } 452 #if defined(USE_PETSC_BOPT_g) 453 baij->ht_total_ct = total_ct; 454 baij->ht_insert_ct = insert_ct; 455 #endif 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNC__ 460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 462 { 463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 464 int ierr,i,j,ii,jj,row,col,k,l; 465 int roworiented = baij->roworiented,rstart=baij->rstart ; 466 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 467 int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 468 double tmp; 469 Scalar ** HD = baij->hd,*value,*v_t,*baij_a; 470 #if defined(USE_PETSC_BOPT_g) 471 int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 472 #endif 473 474 PetscFunctionBegin; 475 476 if (roworiented) { 477 stepval = (n-1)*bs; 478 } else { 479 stepval = (m-1)*bs; 480 } 481 for ( i=0; i<m; i++ ) { 482 #if defined(USE_PETSC_BOPT_g) 483 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 484 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 485 #endif 486 row = im[i]; 487 v_t = v + i*bs2; 488 if (row >= rstart && row < rend) { 489 for ( j=0; j<n; j++ ) { 490 col = in[j]; 491 492 /* Look up into the Hash Table */ 493 key = row*Nbs+col+1; 494 h1 = HASH(size,key,tmp); 495 496 idx = h1; 497 #if defined(USE_PETSC_BOPT_g) 498 total_ct++; 499 insert_ct++; 500 if (HT[idx] != key) { 501 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 502 if (idx == size) { 503 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 504 if (idx == h1) { 505 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 506 } 507 } 508 } 509 #else 510 if (HT[idx] != key) { 511 for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++); 512 if (idx == size) { 513 for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++); 514 if (idx == h1) { 515 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 516 } 517 } 518 } 519 #endif 520 baij_a = HD[idx]; 521 if (roworiented) { 522 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 523 /* value = v + (i*(stepval+bs)+j)*bs; */ 524 value = v_t; 525 v_t += bs; 526 if (addv == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval) { 528 for ( jj=ii; jj<bs2; jj+=bs ) { 529 baij_a[jj] += *value++; 530 } 531 } 532 } else { 533 for ( ii=0; ii<bs; ii++,value+=stepval) { 534 for ( jj=ii; jj<bs2; jj+=bs ) { 535 baij_a[jj] = *value++; 536 } 537 } 538 } 539 } else { 540 value = v + j*(stepval+bs)*bs + i*bs; 541 if (addv == ADD_VALUES) { 542 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 543 for ( jj=0; jj<bs; jj++ ) { 544 baij_a[jj] += *value++; 545 } 546 } 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 549 for ( jj=0; jj<bs; jj++ ) { 550 baij_a[jj] = *value++; 551 } 552 } 553 } 554 } 555 } 556 } else { 557 if (!baij->donotstash) { 558 if (roworiented ) { 559 row = im[i]*bs; 560 value = v + i*(stepval+bs)*bs; 561 for ( j=0; j<bs; j++,row++ ) { 562 for ( k=0; k<n; k++ ) { 563 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 564 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 565 } 566 } 567 } 568 } else { 569 for ( j=0; j<n; j++ ) { 570 value = v + j*(stepval+bs)*bs + i*bs; 571 col = in[j]*bs; 572 for ( k=0; k<bs; k++,col++,value+=stepval) { 573 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 574 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 } 580 } 581 } 582 #if defined(USE_PETSC_BOPT_g) 583 baij->ht_total_ct = total_ct; 584 baij->ht_insert_ct = insert_ct; 585 #endif 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNC__ 590 #define __FUNC__ "MatGetValues_MPIBAIJ" 591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 592 { 593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 594 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 595 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 596 597 PetscFunctionBegin; 598 for ( i=0; i<m; i++ ) { 599 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 600 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 601 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 602 row = idxm[i] - bsrstart; 603 for ( j=0; j<n; j++ ) { 604 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 605 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 606 if (idxn[j] >= bscstart && idxn[j] < bscend){ 607 col = idxn[j] - bscstart; 608 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 609 } else { 610 if (!baij->colmap) { 611 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 if((baij->colmap[idxn[j]/bs]-1 < 0) || 614 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 615 else { 616 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 617 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 618 } 619 } 620 } 621 } else { 622 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatNorm_MPIBAIJ" 630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 631 { 632 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 633 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 634 int ierr, i,bs2=baij->bs2; 635 double sum = 0.0; 636 Scalar *v; 637 638 PetscFunctionBegin; 639 if (baij->size == 1) { 640 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 641 } else { 642 if (type == NORM_FROBENIUS) { 643 v = amat->a; 644 for (i=0; i<amat->nz*bs2; i++ ) { 645 #if defined(USE_PETSC_COMPLEX) 646 sum += real(conj(*v)*(*v)); v++; 647 #else 648 sum += (*v)*(*v); v++; 649 #endif 650 } 651 v = bmat->a; 652 for (i=0; i<bmat->nz*bs2; i++ ) { 653 #if defined(USE_PETSC_COMPLEX) 654 sum += real(conj(*v)*(*v)); v++; 655 #else 656 sum += (*v)*(*v); v++; 657 #endif 658 } 659 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 660 *norm = sqrt(*norm); 661 } else { 662 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 663 } 664 } 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 673 MPI_Comm comm = mat->comm; 674 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 675 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 676 MPI_Request *send_waits,*recv_waits; 677 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 678 InsertMode addv; 679 Scalar *rvalues,*svalues; 680 681 PetscFunctionBegin; 682 if (baij->donotstash) { 683 baij->svalues = 0; baij->rvalues = 0; 684 baij->nsends = 0; baij->nrecvs = 0; 685 baij->send_waits = 0; baij->recv_waits = 0; 686 baij->rmax = 0; 687 PetscFunctionReturn(0); 688 } 689 690 /* make sure all processors are either in INSERTMODE or ADDMODE */ 691 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 692 if (addv == (ADD_VALUES|INSERT_VALUES)) { 693 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 694 } 695 mat->insertmode = addv; /* in case this processor had no cache */ 696 697 /* first count number of contributors to each processor */ 698 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 699 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 700 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 701 for ( i=0; i<baij->stash.n; i++ ) { 702 idx = baij->stash.idx[i]; 703 for ( j=0; j<size; j++ ) { 704 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 705 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 706 } 707 } 708 } 709 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 710 711 /* inform other processors of number of messages and max length*/ 712 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 713 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 714 nreceives = work[rank]; 715 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 716 nmax = work[rank]; 717 PetscFree(work); 718 719 /* post receives: 720 1) each message will consist of ordered pairs 721 (global index,value) we store the global index as a double 722 to simplify the message passing. 723 2) since we don't know how long each individual message is we 724 allocate the largest needed buffer for each receive. Potentially 725 this is a lot of wasted space. 726 727 728 This could be done better. 729 */ 730 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 731 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 732 for ( i=0; i<nreceives; i++ ) { 733 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 734 } 735 736 /* do sends: 737 1) starts[i] gives the starting index in svalues for stuff going to 738 the ith processor 739 */ 740 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 741 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 742 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 743 starts[0] = 0; 744 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 745 for ( i=0; i<baij->stash.n; i++ ) { 746 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 747 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 748 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 749 } 750 PetscFree(owner); 751 starts[0] = 0; 752 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 753 count = 0; 754 for ( i=0; i<size; i++ ) { 755 if (procs[i]) { 756 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 757 } 758 } 759 PetscFree(starts); PetscFree(nprocs); 760 761 /* Free cache space */ 762 PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n); 763 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 764 765 baij->svalues = svalues; baij->rvalues = rvalues; 766 baij->nsends = nsends; baij->nrecvs = nreceives; 767 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 768 baij->rmax = nmax; 769 770 PetscFunctionReturn(0); 771 } 772 773 /* 774 Creates the hash table, and sets the table 775 This table is created only once. 776 If new entried need to be added to the matrix 777 then the hash table has to be destroyed and 778 recreated. 779 */ 780 #undef __FUNC__ 781 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 782 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 783 { 784 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 785 Mat A = baij->A, B=baij->B; 786 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 787 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 788 int size,bs2=baij->bs2,rstart=baij->rstart; 789 int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 790 int *HT,key; 791 Scalar **HD; 792 double tmp; 793 #if defined(USE_PETSC_BOPT_g) 794 int ct=0,max=0; 795 #endif 796 797 PetscFunctionBegin; 798 baij->ht_size=(int)(factor*nz); 799 size = baij->ht_size; 800 801 if (baij->ht) { 802 PetscFunctionReturn(0); 803 } 804 805 /* Allocate Memory for Hash Table */ 806 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 807 baij->ht = (int*)(baij->hd + size); 808 HD = baij->hd; 809 HT = baij->ht; 810 811 812 PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*))); 813 814 815 /* Loop Over A */ 816 for ( i=0; i<a->mbs; i++ ) { 817 for ( j=ai[i]; j<ai[i+1]; j++ ) { 818 row = i+rstart; 819 col = aj[j]+cstart; 820 821 key = row*Nbs + col + 1; 822 h1 = HASH(size,key,tmp); 823 for ( k=0; k<size; k++ ){ 824 if (HT[(h1+k)%size] == 0.0) { 825 HT[(h1+k)%size] = key; 826 HD[(h1+k)%size] = a->a + j*bs2; 827 break; 828 #if defined(USE_PETSC_BOPT_g) 829 } else { 830 ct++; 831 #endif 832 } 833 } 834 #if defined(USE_PETSC_BOPT_g) 835 if (k> max) max = k; 836 #endif 837 } 838 } 839 /* Loop Over B */ 840 for ( i=0; i<b->mbs; i++ ) { 841 for ( j=bi[i]; j<bi[i+1]; j++ ) { 842 row = i+rstart; 843 col = garray[bj[j]]; 844 key = row*Nbs + col + 1; 845 h1 = HASH(size,key,tmp); 846 for ( k=0; k<size; k++ ){ 847 if (HT[(h1+k)%size] == 0.0) { 848 HT[(h1+k)%size] = key; 849 HD[(h1+k)%size] = b->a + j*bs2; 850 break; 851 #if defined(USE_PETSC_BOPT_g) 852 } else { 853 ct++; 854 #endif 855 } 856 } 857 #if defined(USE_PETSC_BOPT_g) 858 if (k> max) max = k; 859 #endif 860 } 861 } 862 863 /* Print Summary */ 864 #if defined(USE_PETSC_BOPT_g) 865 for ( i=0,j=0; i<size; i++) 866 if (HT[i]) {j++;} 867 PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n", 868 (j== 0)? 0.0:((double)(ct+j))/j,max); 869 #endif 870 PetscFunctionReturn(0); 871 } 872 873 #undef __FUNC__ 874 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 876 { 877 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 878 MPI_Status *send_status,recv_status; 879 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 880 int bs=baij->bs,row,col,other_disassembled; 881 Scalar *values,val; 882 InsertMode addv = mat->insertmode; 883 884 PetscFunctionBegin; 885 /* wait on receives */ 886 while (count) { 887 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 888 /* unpack receives into our local space */ 889 values = baij->rvalues + 3*imdex*baij->rmax; 890 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 891 n = n/3; 892 for ( i=0; i<n; i++ ) { 893 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 894 col = (int) PetscReal(values[3*i+1]); 895 val = values[3*i+2]; 896 if (col >= baij->cstart*bs && col < baij->cend*bs) { 897 col -= baij->cstart*bs; 898 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 899 } else { 900 if (mat->was_assembled) { 901 if (!baij->colmap) { 902 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 903 } 904 col = (baij->colmap[col/bs]) - 1 + col%bs; 905 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 906 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 907 col = (int) PetscReal(values[3*i+1]); 908 } 909 } 910 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 911 } 912 } 913 count--; 914 } 915 if (baij->recv_waits) PetscFree(baij->recv_waits); 916 if (baij->rvalues) PetscFree(baij->rvalues); 917 918 /* wait on sends */ 919 if (baij->nsends) { 920 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 921 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 922 PetscFree(send_status); 923 } 924 if (baij->send_waits) PetscFree(baij->send_waits); 925 if (baij->svalues) PetscFree(baij->svalues); 926 927 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 928 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 929 930 /* determine if any processor has disassembled, if so we must 931 also disassemble ourselfs, in order that we may reassemble. */ 932 /* 933 if nonzero structure of submatrix B cannot change then we know that 934 no processor disassembled thus we can skip this stuff 935 */ 936 if (!((Mat_SeqBAIJ*) baij->B->data)->nonew) { 937 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 938 if (mat->was_assembled && !other_disassembled) { 939 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 940 } 941 } 942 943 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 944 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 945 } 946 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 947 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 948 949 #if defined(USE_PETSC_BOPT_g) 950 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 951 PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n", 952 ((double)baij->ht_total_ct)/baij->ht_insert_ct); 953 baij->ht_total_ct = 0; 954 baij->ht_insert_ct = 0; 955 } 956 #endif 957 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 958 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr); 959 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 960 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 961 } 962 963 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 964 PetscFunctionReturn(0); 965 } 966 967 #undef __FUNC__ 968 #define __FUNC__ "MatView_MPIBAIJ_Binary" 969 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 970 { 971 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 972 int ierr; 973 974 PetscFunctionBegin; 975 if (baij->size == 1) { 976 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 977 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNC__ 982 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 983 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 984 { 985 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 986 int ierr, format,rank,bs = baij->bs; 987 FILE *fd; 988 ViewerType vtype; 989 990 PetscFunctionBegin; 991 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 992 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 993 ierr = ViewerGetFormat(viewer,&format); 994 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 995 MatInfo info; 996 MPI_Comm_rank(mat->comm,&rank); 997 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 998 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 999 PetscSequentialPhaseBegin(mat->comm,1); 1000 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1001 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1002 baij->bs,(int)info.memory); 1003 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 1004 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1005 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 1006 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 1007 fflush(fd); 1008 PetscSequentialPhaseEnd(mat->comm,1); 1009 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1012 PetscPrintf(mat->comm," block size is %d\n",bs); 1013 PetscFunctionReturn(0); 1014 } 1015 } 1016 1017 if (vtype == DRAW_VIEWER) { 1018 Draw draw; 1019 PetscTruth isnull; 1020 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 1021 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1022 } 1023 1024 if (vtype == ASCII_FILE_VIEWER) { 1025 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 1026 PetscSequentialPhaseBegin(mat->comm,1); 1027 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 1028 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 1029 baij->cstart*bs,baij->cend*bs); 1030 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1031 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 1032 fflush(fd); 1033 PetscSequentialPhaseEnd(mat->comm,1); 1034 } else { 1035 int size = baij->size; 1036 rank = baij->rank; 1037 if (size == 1) { 1038 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 1039 } else { 1040 /* assemble the entire matrix onto first processor. */ 1041 Mat A; 1042 Mat_SeqBAIJ *Aloc; 1043 int M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals; 1044 int mbs=baij->mbs; 1045 Scalar *a; 1046 1047 if (!rank) { 1048 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1049 } else { 1050 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1051 } 1052 PLogObjectParent(mat,A); 1053 1054 /* copy over the A part */ 1055 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1056 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1057 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1058 1059 for ( i=0; i<mbs; i++ ) { 1060 rvals[0] = bs*(baij->rstart + i); 1061 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1062 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1063 col = (baij->cstart+aj[j])*bs; 1064 for (k=0; k<bs; k++ ) { 1065 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1066 col++; a += bs; 1067 } 1068 } 1069 } 1070 /* copy over the B part */ 1071 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1072 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1073 for ( i=0; i<mbs; i++ ) { 1074 rvals[0] = bs*(baij->rstart + i); 1075 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1076 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1077 col = baij->garray[aj[j]]*bs; 1078 for (k=0; k<bs; k++ ) { 1079 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1080 col++; a += bs; 1081 } 1082 } 1083 } 1084 PetscFree(rvals); 1085 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1086 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1087 /* 1088 Everyone has to call to draw the matrix since the graphics waits are 1089 synchronized across all processors that share the Draw object 1090 */ 1091 if (!rank || vtype == DRAW_VIEWER) { 1092 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1093 } 1094 ierr = MatDestroy(A); CHKERRQ(ierr); 1095 } 1096 } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatView_MPIBAIJ" 1104 int MatView_MPIBAIJ(Mat mat,Viewer viewer) 1105 { 1106 int ierr; 1107 ViewerType vtype; 1108 1109 PetscFunctionBegin; 1110 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1111 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1112 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1113 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1114 } else if (vtype == BINARY_FILE_VIEWER) { 1115 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1116 } else { 1117 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNC__ 1123 #define __FUNC__ "MatDestroy_MPIBAIJ" 1124 int MatDestroy_MPIBAIJ(Mat mat) 1125 { 1126 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1127 int ierr; 1128 1129 PetscFunctionBegin; 1130 #if defined(USE_PETSC_LOG) 1131 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N); 1132 #endif 1133 1134 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1135 PetscFree(baij->rowners); 1136 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1137 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1138 if (baij->colmap) PetscFree(baij->colmap); 1139 if (baij->garray) PetscFree(baij->garray); 1140 if (baij->lvec) VecDestroy(baij->lvec); 1141 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1142 if (baij->rowvalues) PetscFree(baij->rowvalues); 1143 if (baij->barray) PetscFree(baij->barray); 1144 if (baij->hd) PetscFree(baij->hd); 1145 PetscFree(baij); 1146 PLogObjectDestroy(mat); 1147 PetscHeaderDestroy(mat); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNC__ 1152 #define __FUNC__ "MatMult_MPIBAIJ" 1153 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1154 { 1155 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1156 int ierr, nt; 1157 1158 PetscFunctionBegin; 1159 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1160 if (nt != a->n) { 1161 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1162 } 1163 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1164 if (nt != a->m) { 1165 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1166 } 1167 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1168 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 1169 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1170 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1171 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNC__ 1176 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1177 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1178 { 1179 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1180 int ierr; 1181 1182 PetscFunctionBegin; 1183 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1184 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1185 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1186 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNC__ 1191 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1192 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1193 { 1194 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1195 int ierr; 1196 1197 PetscFunctionBegin; 1198 /* do nondiagonal part */ 1199 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1200 /* send it on its way */ 1201 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1202 /* do local part */ 1203 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1204 /* receive remote parts: note this assumes the values are not actually */ 1205 /* inserted in yy until the next line, which is true for my implementation*/ 1206 /* but is not perhaps always true. */ 1207 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNC__ 1212 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1213 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1214 { 1215 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1216 int ierr; 1217 1218 PetscFunctionBegin; 1219 /* do nondiagonal part */ 1220 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1221 /* send it on its way */ 1222 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1223 /* do local part */ 1224 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1225 /* receive remote parts: note this assumes the values are not actually */ 1226 /* inserted in yy until the next line, which is true for my implementation*/ 1227 /* but is not perhaps always true. */ 1228 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 /* 1233 This only works correctly for square matrices where the subblock A->A is the 1234 diagonal block 1235 */ 1236 #undef __FUNC__ 1237 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1238 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1239 { 1240 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1245 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNC__ 1250 #define __FUNC__ "MatScale_MPIBAIJ" 1251 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1254 int ierr; 1255 1256 PetscFunctionBegin; 1257 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1258 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNC__ 1263 #define __FUNC__ "MatGetSize_MPIBAIJ" 1264 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1265 { 1266 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1267 1268 PetscFunctionBegin; 1269 if (m) *m = mat->M; 1270 if (n) *n = mat->N; 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNC__ 1275 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1276 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1277 { 1278 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1279 1280 PetscFunctionBegin; 1281 *m = mat->m; *n = mat->n; 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNC__ 1286 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1287 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1288 { 1289 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1290 1291 PetscFunctionBegin; 1292 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1293 PetscFunctionReturn(0); 1294 } 1295 1296 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1297 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1298 1299 #undef __FUNC__ 1300 #define __FUNC__ "MatGetRow_MPIBAIJ" 1301 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1302 { 1303 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1304 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1305 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1306 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1307 int *cmap, *idx_p,cstart = mat->cstart; 1308 1309 PetscFunctionBegin; 1310 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1311 mat->getrowactive = PETSC_TRUE; 1312 1313 if (!mat->rowvalues && (idx || v)) { 1314 /* 1315 allocate enough space to hold information from the longest row. 1316 */ 1317 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1318 int max = 1,mbs = mat->mbs,tmp; 1319 for ( i=0; i<mbs; i++ ) { 1320 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1321 if (max < tmp) { max = tmp; } 1322 } 1323 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1324 CHKPTRQ(mat->rowvalues); 1325 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1326 } 1327 1328 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1329 lrow = row - brstart; 1330 1331 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1332 if (!v) {pvA = 0; pvB = 0;} 1333 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1334 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1336 nztot = nzA + nzB; 1337 1338 cmap = mat->garray; 1339 if (v || idx) { 1340 if (nztot) { 1341 /* Sort by increasing column numbers, assuming A and B already sorted */ 1342 int imark = -1; 1343 if (v) { 1344 *v = v_p = mat->rowvalues; 1345 for ( i=0; i<nzB; i++ ) { 1346 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1347 else break; 1348 } 1349 imark = i; 1350 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1351 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1352 } 1353 if (idx) { 1354 *idx = idx_p = mat->rowindices; 1355 if (imark > -1) { 1356 for ( i=0; i<imark; i++ ) { 1357 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1358 } 1359 } else { 1360 for ( i=0; i<nzB; i++ ) { 1361 if (cmap[cworkB[i]/bs] < cstart) 1362 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1363 else break; 1364 } 1365 imark = i; 1366 } 1367 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1368 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1369 } 1370 } else { 1371 if (idx) *idx = 0; 1372 if (v) *v = 0; 1373 } 1374 } 1375 *nz = nztot; 1376 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1377 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNC__ 1382 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1383 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1384 { 1385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1386 1387 PetscFunctionBegin; 1388 if (baij->getrowactive == PETSC_FALSE) { 1389 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1390 } 1391 baij->getrowactive = PETSC_FALSE; 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNC__ 1396 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1397 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1398 { 1399 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1400 1401 PetscFunctionBegin; 1402 *bs = baij->bs; 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNC__ 1407 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1408 int MatZeroEntries_MPIBAIJ(Mat A) 1409 { 1410 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1411 int ierr; 1412 1413 PetscFunctionBegin; 1414 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1415 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNC__ 1420 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1421 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1422 { 1423 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1424 Mat A = a->A, B = a->B; 1425 int ierr; 1426 double isend[5], irecv[5]; 1427 1428 PetscFunctionBegin; 1429 info->block_size = (double)a->bs; 1430 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1431 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1432 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1433 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1434 if (flag == MAT_LOCAL) { 1435 info->nz_used = isend[0]; 1436 info->nz_allocated = isend[1]; 1437 info->nz_unneeded = isend[2]; 1438 info->memory = isend[3]; 1439 info->mallocs = isend[4]; 1440 } else if (flag == MAT_GLOBAL_MAX) { 1441 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1442 info->nz_used = irecv[0]; 1443 info->nz_allocated = irecv[1]; 1444 info->nz_unneeded = irecv[2]; 1445 info->memory = irecv[3]; 1446 info->mallocs = irecv[4]; 1447 } else if (flag == MAT_GLOBAL_SUM) { 1448 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1449 info->nz_used = irecv[0]; 1450 info->nz_allocated = irecv[1]; 1451 info->nz_unneeded = irecv[2]; 1452 info->memory = irecv[3]; 1453 info->mallocs = irecv[4]; 1454 } 1455 info->rows_global = (double)a->M; 1456 info->columns_global = (double)a->N; 1457 info->rows_local = (double)a->m; 1458 info->columns_local = (double)a->N; 1459 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1460 info->fill_ratio_needed = 0; 1461 info->factor_mallocs = 0; 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNC__ 1466 #define __FUNC__ "MatSetOption_MPIBAIJ" 1467 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1468 { 1469 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1470 1471 PetscFunctionBegin; 1472 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1473 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1474 op == MAT_COLUMNS_UNSORTED || 1475 op == MAT_COLUMNS_SORTED || 1476 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1477 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1478 MatSetOption(a->A,op); 1479 MatSetOption(a->B,op); 1480 } else if (op == MAT_ROW_ORIENTED) { 1481 a->roworiented = 1; 1482 MatSetOption(a->A,op); 1483 MatSetOption(a->B,op); 1484 } else if (op == MAT_ROWS_SORTED || 1485 op == MAT_ROWS_UNSORTED || 1486 op == MAT_SYMMETRIC || 1487 op == MAT_STRUCTURALLY_SYMMETRIC || 1488 op == MAT_YES_NEW_DIAGONALS || 1489 op == MAT_USE_HASH_TABLE) 1490 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1491 else if (op == MAT_COLUMN_ORIENTED) { 1492 a->roworiented = 0; 1493 MatSetOption(a->A,op); 1494 MatSetOption(a->B,op); 1495 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1496 a->donotstash = 1; 1497 } else if (op == MAT_NO_NEW_DIAGONALS) { 1498 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1499 } else if (op == MAT_USE_HASH_TABLE) { 1500 a->ht_flag = 1; 1501 } else { 1502 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1503 } 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNC__ 1508 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1509 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1512 Mat_SeqBAIJ *Aloc; 1513 Mat B; 1514 int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1515 int bs=baij->bs,mbs=baij->mbs; 1516 Scalar *a; 1517 1518 PetscFunctionBegin; 1519 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1520 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1521 CHKERRQ(ierr); 1522 1523 /* copy over the A part */ 1524 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1525 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1526 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1527 1528 for ( i=0; i<mbs; i++ ) { 1529 rvals[0] = bs*(baij->rstart + i); 1530 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1531 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1532 col = (baij->cstart+aj[j])*bs; 1533 for (k=0; k<bs; k++ ) { 1534 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1535 col++; a += bs; 1536 } 1537 } 1538 } 1539 /* copy over the B part */ 1540 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1541 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 for ( i=0; i<mbs; i++ ) { 1543 rvals[0] = bs*(baij->rstart + i); 1544 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1545 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1546 col = baij->garray[aj[j]]*bs; 1547 for (k=0; k<bs; k++ ) { 1548 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1549 col++; a += bs; 1550 } 1551 } 1552 } 1553 PetscFree(rvals); 1554 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1555 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1556 1557 if (matout != PETSC_NULL) { 1558 *matout = B; 1559 } else { 1560 PetscOps *Abops; 1561 struct _MatOps *Aops; 1562 1563 /* This isn't really an in-place transpose .... but free data structures from baij */ 1564 PetscFree(baij->rowners); 1565 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1566 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1567 if (baij->colmap) PetscFree(baij->colmap); 1568 if (baij->garray) PetscFree(baij->garray); 1569 if (baij->lvec) VecDestroy(baij->lvec); 1570 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1571 PetscFree(baij); 1572 1573 /* 1574 This is horrible, horrible code. We need to keep the 1575 A pointers for the bops and ops but copy everything 1576 else from C. 1577 */ 1578 Abops = A->bops; 1579 Aops = A->ops; 1580 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1581 A->bops = Abops; 1582 A->ops = Aops; 1583 1584 PetscHeaderDestroy(B); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNC__ 1590 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1591 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1592 { 1593 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1594 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1595 int ierr,s1,s2,s3; 1596 1597 PetscFunctionBegin; 1598 if (ll) { 1599 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1600 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1601 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1602 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1603 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1604 } 1605 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNC__ 1610 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1611 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1612 { 1613 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1614 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1615 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1616 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1617 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1618 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1619 MPI_Comm comm = A->comm; 1620 MPI_Request *send_waits,*recv_waits; 1621 MPI_Status recv_status,*send_status; 1622 IS istmp; 1623 PetscTruth localdiag; 1624 1625 PetscFunctionBegin; 1626 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1627 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1628 1629 /* first count number of contributors to each processor */ 1630 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1631 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1632 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1633 for ( i=0; i<N; i++ ) { 1634 idx = rows[i]; 1635 found = 0; 1636 for ( j=0; j<size; j++ ) { 1637 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1638 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1639 } 1640 } 1641 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1642 } 1643 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1644 1645 /* inform other processors of number of messages and max length*/ 1646 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1647 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1648 nrecvs = work[rank]; 1649 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1650 nmax = work[rank]; 1651 PetscFree(work); 1652 1653 /* post receives: */ 1654 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1655 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1656 for ( i=0; i<nrecvs; i++ ) { 1657 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1658 } 1659 1660 /* do sends: 1661 1) starts[i] gives the starting index in svalues for stuff going to 1662 the ith processor 1663 */ 1664 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1665 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1666 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1667 starts[0] = 0; 1668 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1669 for ( i=0; i<N; i++ ) { 1670 svalues[starts[owner[i]]++] = rows[i]; 1671 } 1672 ISRestoreIndices(is,&rows); 1673 1674 starts[0] = 0; 1675 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1676 count = 0; 1677 for ( i=0; i<size; i++ ) { 1678 if (procs[i]) { 1679 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1680 } 1681 } 1682 PetscFree(starts); 1683 1684 base = owners[rank]*bs; 1685 1686 /* wait on receives */ 1687 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1688 source = lens + nrecvs; 1689 count = nrecvs; slen = 0; 1690 while (count) { 1691 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1692 /* unpack receives into our local space */ 1693 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1694 source[imdex] = recv_status.MPI_SOURCE; 1695 lens[imdex] = n; 1696 slen += n; 1697 count--; 1698 } 1699 PetscFree(recv_waits); 1700 1701 /* move the data into the send scatter */ 1702 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1703 count = 0; 1704 for ( i=0; i<nrecvs; i++ ) { 1705 values = rvalues + i*nmax; 1706 for ( j=0; j<lens[i]; j++ ) { 1707 lrows[count++] = values[j] - base; 1708 } 1709 } 1710 PetscFree(rvalues); PetscFree(lens); 1711 PetscFree(owner); PetscFree(nprocs); 1712 1713 /* actually zap the local rows */ 1714 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1715 PLogObjectParent(A,istmp); 1716 1717 /* 1718 Zero the required rows. If the "diagonal block" of the matrix 1719 is square and the user wishes to set the diagonal we use seperate 1720 code so that MatSetValues() is not called for each diagonal allocating 1721 new memory, thus calling lots of mallocs and slowing things down. 1722 1723 Contributed by: Mathew Knepley 1724 */ 1725 localdiag = PETSC_FALSE; 1726 if (diag && (l->A->M == l->A->N)) { 1727 localdiag = PETSC_TRUE; 1728 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 1729 } else { 1730 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1731 } 1732 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1733 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1734 1735 if (diag && (localdiag == PETSC_FALSE)) { 1736 for ( i = 0; i < slen; i++ ) { 1737 row = lrows[i] + rstart_bs; 1738 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1739 } 1740 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1741 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1742 } 1743 PetscFree(lrows); 1744 1745 /* wait on sends */ 1746 if (nsends) { 1747 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1748 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1749 PetscFree(send_status); 1750 } 1751 PetscFree(send_waits); PetscFree(svalues); 1752 1753 PetscFunctionReturn(0); 1754 } 1755 1756 extern int MatPrintHelp_SeqBAIJ(Mat); 1757 #undef __FUNC__ 1758 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1759 int MatPrintHelp_MPIBAIJ(Mat A) 1760 { 1761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1762 MPI_Comm comm = A->comm; 1763 static int called = 0; 1764 int ierr; 1765 1766 PetscFunctionBegin; 1767 if (!a->rank) { 1768 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1769 } 1770 if (called) {PetscFunctionReturn(0);} else called = 1; 1771 (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n"); 1772 (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n"); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNC__ 1777 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1778 int MatSetUnfactored_MPIBAIJ(Mat A) 1779 { 1780 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1781 int ierr; 1782 1783 PetscFunctionBegin; 1784 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1789 1790 /* -------------------------------------------------------------------*/ 1791 static struct _MatOps MatOps = { 1792 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1793 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1794 0,0,0,0, 1795 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1796 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1797 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1798 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1799 0,0,0,MatGetSize_MPIBAIJ, 1800 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1801 0,0,MatConvertSameType_MPIBAIJ,0,0, 1802 0,0,0,MatGetSubMatrices_MPIBAIJ, 1803 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1804 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1805 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1806 1807 1808 #undef __FUNC__ 1809 #define __FUNC__ "MatCreateMPIBAIJ" 1810 /*@C 1811 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1812 (block compressed row). For good matrix assembly performance 1813 the user should preallocate the matrix storage by setting the parameters 1814 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1815 performance can be increased by more than a factor of 50. 1816 1817 Collective on MPI_Comm 1818 1819 Input Parameters: 1820 + comm - MPI communicator 1821 . bs - size of blockk 1822 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1823 This value should be the same as the local size used in creating the 1824 y vector for the matrix-vector product y = Ax. 1825 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1826 This value should be the same as the local size used in creating the 1827 x vector for the matrix-vector product y = Ax. 1828 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1829 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1830 . d_nz - number of block nonzeros per block row in diagonal portion of local 1831 submatrix (same for all local rows) 1832 . d_nzz - array containing the number of block nonzeros in the various block rows 1833 of the in diagonal portion of the local (possibly different for each block 1834 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 1835 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1836 submatrix (same for all local rows). 1837 - o_nzz - array containing the number of nonzeros in the various block rows of the 1838 off-diagonal portion of the local submatrix (possibly different for 1839 each block row) or PETSC_NULL. 1840 1841 Output Parameter: 1842 . A - the matrix 1843 1844 Options Database Keys: 1845 . -mat_no_unroll - uses code that does not unroll the loops in the 1846 block calculations (much slower) 1847 . -mat_block_size - size of the blocks to use 1848 1849 Notes: 1850 The user MUST specify either the local or global matrix dimensions 1851 (possibly both). 1852 1853 Storage Information: 1854 For a square global matrix we define each processor's diagonal portion 1855 to be its local rows and the corresponding columns (a square submatrix); 1856 each processor's off-diagonal portion encompasses the remainder of the 1857 local matrix (a rectangular submatrix). 1858 1859 The user can specify preallocated storage for the diagonal part of 1860 the local submatrix with either d_nz or d_nnz (not both). Set 1861 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1862 memory allocation. Likewise, specify preallocated storage for the 1863 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1864 1865 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1866 the figure below we depict these three local rows and all columns (0-11). 1867 1868 .vb 1869 0 1 2 3 4 5 6 7 8 9 10 11 1870 ------------------- 1871 row 3 | o o o d d d o o o o o o 1872 row 4 | o o o d d d o o o o o o 1873 row 5 | o o o d d d o o o o o o 1874 ------------------- 1875 .ve 1876 1877 Thus, any entries in the d locations are stored in the d (diagonal) 1878 submatrix, and any entries in the o locations are stored in the 1879 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1880 stored simply in the MATSEQBAIJ format for compressed row storage. 1881 1882 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1883 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1884 In general, for PDE problems in which most nonzeros are near the diagonal, 1885 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1886 or you will get TERRIBLE performance; see the users' manual chapter on 1887 matrices. 1888 1889 .keywords: matrix, block, aij, compressed row, sparse, parallel 1890 1891 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ() 1892 @*/ 1893 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1894 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1895 { 1896 Mat B; 1897 Mat_MPIBAIJ *b; 1898 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg; 1899 1900 PetscFunctionBegin; 1901 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1902 1903 MPI_Comm_size(comm,&size); 1904 if (size == 1) { 1905 if (M == PETSC_DECIDE) M = m; 1906 if (N == PETSC_DECIDE) N = n; 1907 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1908 PetscFunctionReturn(0); 1909 } 1910 1911 *A = 0; 1912 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1913 PLogObjectCreate(B); 1914 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1915 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1916 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1917 1918 B->ops->destroy = MatDestroy_MPIBAIJ; 1919 B->ops->view = MatView_MPIBAIJ; 1920 B->mapping = 0; 1921 B->factor = 0; 1922 B->assembled = PETSC_FALSE; 1923 1924 B->insertmode = NOT_SET_VALUES; 1925 MPI_Comm_rank(comm,&b->rank); 1926 MPI_Comm_size(comm,&b->size); 1927 1928 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1929 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1930 } 1931 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1932 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1933 } 1934 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1935 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1936 } 1937 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1938 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1939 1940 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1941 work[0] = m; work[1] = n; 1942 mbs = m/bs; nbs = n/bs; 1943 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1944 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1945 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1946 } 1947 if (m == PETSC_DECIDE) { 1948 Mbs = M/bs; 1949 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1950 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1951 m = mbs*bs; 1952 } 1953 if (n == PETSC_DECIDE) { 1954 Nbs = N/bs; 1955 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1956 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1957 n = nbs*bs; 1958 } 1959 if (mbs*bs != m || nbs*bs != n) { 1960 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1961 } 1962 1963 b->m = m; B->m = m; 1964 b->n = n; B->n = n; 1965 b->N = N; B->N = N; 1966 b->M = M; B->M = M; 1967 b->bs = bs; 1968 b->bs2 = bs*bs; 1969 b->mbs = mbs; 1970 b->nbs = nbs; 1971 b->Mbs = Mbs; 1972 b->Nbs = Nbs; 1973 1974 /* build local table of row and column ownerships */ 1975 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1976 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1977 b->cowners = b->rowners + b->size + 2; 1978 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1979 b->rowners[0] = 0; 1980 for ( i=2; i<=b->size; i++ ) { 1981 b->rowners[i] += b->rowners[i-1]; 1982 } 1983 b->rstart = b->rowners[b->rank]; 1984 b->rend = b->rowners[b->rank+1]; 1985 b->rstart_bs = b->rstart * bs; 1986 b->rend_bs = b->rend * bs; 1987 1988 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1989 b->cowners[0] = 0; 1990 for ( i=2; i<=b->size; i++ ) { 1991 b->cowners[i] += b->cowners[i-1]; 1992 } 1993 b->cstart = b->cowners[b->rank]; 1994 b->cend = b->cowners[b->rank+1]; 1995 b->cstart_bs = b->cstart * bs; 1996 b->cend_bs = b->cend * bs; 1997 1998 1999 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2000 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2001 PLogObjectParent(B,b->A); 2002 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2003 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2004 PLogObjectParent(B,b->B); 2005 2006 /* build cache for off array entries formed */ 2007 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 2008 b->donotstash = 0; 2009 b->colmap = 0; 2010 b->garray = 0; 2011 b->roworiented = 1; 2012 2013 /* stuff used in block assembly */ 2014 b->barray = 0; 2015 2016 /* stuff used for matrix vector multiply */ 2017 b->lvec = 0; 2018 b->Mvctx = 0; 2019 2020 /* stuff for MatGetRow() */ 2021 b->rowindices = 0; 2022 b->rowvalues = 0; 2023 b->getrowactive = PETSC_FALSE; 2024 2025 /* hash table stuff */ 2026 b->ht = 0; 2027 b->hd = 0; 2028 b->ht_size = 0; 2029 b->ht_flag = 0; 2030 b->ht_fact = 0; 2031 b->ht_total_ct = 0; 2032 b->ht_insert_ct = 0; 2033 2034 *A = B; 2035 ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr); 2036 if (flg) { 2037 double fact = 1.39; 2038 ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr); 2039 ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr); 2040 if (fact <= 1.0) fact = 1.39; 2041 ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr); 2042 PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact); 2043 } 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNC__ 2048 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 2049 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 2050 { 2051 Mat mat; 2052 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 2053 int ierr, len=0, flg; 2054 2055 PetscFunctionBegin; 2056 *newmat = 0; 2057 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 2058 PLogObjectCreate(mat); 2059 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 2060 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 2061 mat->ops->destroy = MatDestroy_MPIBAIJ; 2062 mat->ops->view = MatView_MPIBAIJ; 2063 mat->factor = matin->factor; 2064 mat->assembled = PETSC_TRUE; 2065 2066 a->m = mat->m = oldmat->m; 2067 a->n = mat->n = oldmat->n; 2068 a->M = mat->M = oldmat->M; 2069 a->N = mat->N = oldmat->N; 2070 2071 a->bs = oldmat->bs; 2072 a->bs2 = oldmat->bs2; 2073 a->mbs = oldmat->mbs; 2074 a->nbs = oldmat->nbs; 2075 a->Mbs = oldmat->Mbs; 2076 a->Nbs = oldmat->Nbs; 2077 2078 a->rstart = oldmat->rstart; 2079 a->rend = oldmat->rend; 2080 a->cstart = oldmat->cstart; 2081 a->cend = oldmat->cend; 2082 a->size = oldmat->size; 2083 a->rank = oldmat->rank; 2084 mat->insertmode = NOT_SET_VALUES; 2085 a->rowvalues = 0; 2086 a->getrowactive = PETSC_FALSE; 2087 a->barray = 0; 2088 2089 /* hash table stuff */ 2090 a->ht = 0; 2091 a->hd = 0; 2092 a->ht_size = 0; 2093 a->ht_flag = oldmat->ht_flag; 2094 a->ht_fact = oldmat->ht_fact; 2095 a->ht_total_ct = 0; 2096 a->ht_insert_ct = 0; 2097 2098 2099 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2100 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 2101 a->cowners = a->rowners + a->size + 2; 2102 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2103 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 2104 if (oldmat->colmap) { 2105 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2106 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2107 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 2108 } else a->colmap = 0; 2109 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 2110 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 2111 PLogObjectMemory(mat,len*sizeof(int)); 2112 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2113 } else a->garray = 0; 2114 2115 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2116 PLogObjectParent(mat,a->lvec); 2117 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2118 PLogObjectParent(mat,a->Mvctx); 2119 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 2120 PLogObjectParent(mat,a->A); 2121 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 2122 PLogObjectParent(mat,a->B); 2123 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2124 if (flg) { 2125 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2126 } 2127 *newmat = mat; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #include "sys.h" 2132 2133 #undef __FUNC__ 2134 #define __FUNC__ "MatLoad_MPIBAIJ" 2135 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 2136 { 2137 Mat A; 2138 int i, nz, ierr, j,rstart, rend, fd; 2139 Scalar *vals,*buf; 2140 MPI_Comm comm = ((PetscObject)viewer)->comm; 2141 MPI_Status status; 2142 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2143 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 2144 int flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2145 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2146 int dcount,kmax,k,nzcount,tmp; 2147 2148 PetscFunctionBegin; 2149 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2150 2151 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2152 if (!rank) { 2153 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2154 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2155 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2156 if (header[3] < 0) { 2157 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2158 } 2159 } 2160 2161 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2162 M = header[1]; N = header[2]; 2163 2164 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2165 2166 /* 2167 This code adds extra rows to make sure the number of rows is 2168 divisible by the blocksize 2169 */ 2170 Mbs = M/bs; 2171 extra_rows = bs - M + bs*(Mbs); 2172 if (extra_rows == bs) extra_rows = 0; 2173 else Mbs++; 2174 if (extra_rows &&!rank) { 2175 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2176 } 2177 2178 /* determine ownership of all rows */ 2179 mbs = Mbs/size + ((Mbs % size) > rank); 2180 m = mbs * bs; 2181 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2182 browners = rowners + size + 1; 2183 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2184 rowners[0] = 0; 2185 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2186 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2187 rstart = rowners[rank]; 2188 rend = rowners[rank+1]; 2189 2190 /* distribute row lengths to all processors */ 2191 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2192 if (!rank) { 2193 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2194 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2195 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2196 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2197 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2198 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2199 PetscFree(sndcounts); 2200 } else { 2201 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2202 } 2203 2204 if (!rank) { 2205 /* calculate the number of nonzeros on each processor */ 2206 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2207 PetscMemzero(procsnz,size*sizeof(int)); 2208 for ( i=0; i<size; i++ ) { 2209 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2210 procsnz[i] += rowlengths[j]; 2211 } 2212 } 2213 PetscFree(rowlengths); 2214 2215 /* determine max buffer needed and allocate it */ 2216 maxnz = 0; 2217 for ( i=0; i<size; i++ ) { 2218 maxnz = PetscMax(maxnz,procsnz[i]); 2219 } 2220 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2221 2222 /* read in my part of the matrix column indices */ 2223 nz = procsnz[0]; 2224 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2225 mycols = ibuf; 2226 if (size == 1) nz -= extra_rows; 2227 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2228 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2229 2230 /* read in every ones (except the last) and ship off */ 2231 for ( i=1; i<size-1; i++ ) { 2232 nz = procsnz[i]; 2233 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2234 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2235 } 2236 /* read in the stuff for the last proc */ 2237 if ( size != 1 ) { 2238 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2239 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2240 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2241 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2242 } 2243 PetscFree(cols); 2244 } else { 2245 /* determine buffer space needed for message */ 2246 nz = 0; 2247 for ( i=0; i<m; i++ ) { 2248 nz += locrowlens[i]; 2249 } 2250 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2251 mycols = ibuf; 2252 /* receive message of column indices*/ 2253 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2254 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2255 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2256 } 2257 2258 /* loop over local rows, determining number of off diagonal entries */ 2259 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2260 odlens = dlens + (rend-rstart); 2261 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2262 PetscMemzero(mask,3*Mbs*sizeof(int)); 2263 masked1 = mask + Mbs; 2264 masked2 = masked1 + Mbs; 2265 rowcount = 0; nzcount = 0; 2266 for ( i=0; i<mbs; i++ ) { 2267 dcount = 0; 2268 odcount = 0; 2269 for ( j=0; j<bs; j++ ) { 2270 kmax = locrowlens[rowcount]; 2271 for ( k=0; k<kmax; k++ ) { 2272 tmp = mycols[nzcount++]/bs; 2273 if (!mask[tmp]) { 2274 mask[tmp] = 1; 2275 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2276 else masked1[dcount++] = tmp; 2277 } 2278 } 2279 rowcount++; 2280 } 2281 2282 dlens[i] = dcount; 2283 odlens[i] = odcount; 2284 2285 /* zero out the mask elements we set */ 2286 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2287 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2288 } 2289 2290 /* create our matrix */ 2291 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2292 CHKERRQ(ierr); 2293 A = *newmat; 2294 MatSetOption(A,MAT_COLUMNS_SORTED); 2295 2296 if (!rank) { 2297 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2298 /* read in my part of the matrix numerical values */ 2299 nz = procsnz[0]; 2300 vals = buf; 2301 mycols = ibuf; 2302 if (size == 1) nz -= extra_rows; 2303 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2304 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2305 2306 /* insert into matrix */ 2307 jj = rstart*bs; 2308 for ( i=0; i<m; i++ ) { 2309 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2310 mycols += locrowlens[i]; 2311 vals += locrowlens[i]; 2312 jj++; 2313 } 2314 /* read in other processors (except the last one) and ship out */ 2315 for ( i=1; i<size-1; i++ ) { 2316 nz = procsnz[i]; 2317 vals = buf; 2318 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2319 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2320 } 2321 /* the last proc */ 2322 if ( size != 1 ){ 2323 nz = procsnz[i] - extra_rows; 2324 vals = buf; 2325 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2326 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2327 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2328 } 2329 PetscFree(procsnz); 2330 } else { 2331 /* receive numeric values */ 2332 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2333 2334 /* receive message of values*/ 2335 vals = buf; 2336 mycols = ibuf; 2337 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2338 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2339 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2340 2341 /* insert into matrix */ 2342 jj = rstart*bs; 2343 for ( i=0; i<m; i++ ) { 2344 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2345 mycols += locrowlens[i]; 2346 vals += locrowlens[i]; 2347 jj++; 2348 } 2349 } 2350 PetscFree(locrowlens); 2351 PetscFree(buf); 2352 PetscFree(ibuf); 2353 PetscFree(rowners); 2354 PetscFree(dlens); 2355 PetscFree(mask); 2356 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2357 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 2362 2363 #undef __FUNC__ 2364 #define __FUNC__ "MatMPIBAIJSetHashTableFactor" 2365 /*@ 2366 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2367 2368 Input Parameters: 2369 . mat - the matrix 2370 . fact - factor 2371 2372 Collective on Mat 2373 2374 Notes: 2375 This can also be set by the command line option: -mat_use_hash_table fact 2376 2377 .keywords: matrix, hashtable, factor, HT 2378 2379 .seealso: MatSetOption() 2380 @*/ 2381 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact) 2382 { 2383 Mat_MPIBAIJ *baij; 2384 2385 PetscFunctionBegin; 2386 PetscValidHeaderSpecific(mat,MAT_COOKIE); 2387 if (mat->type != MATMPIBAIJ) { 2388 SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2389 } 2390 baij = (Mat_MPIBAIJ*) mat->data; 2391 baij->ht_fact = fact; 2392 PetscFunctionReturn(0); 2393 } 2394