1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpibaij.c,v 1.98 1998/01/12 21:15:42 balay Exp balay $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 10 extern int MatSetUpMultiply_MPIBAIJ(Mat); 11 extern int DisAssemble_MPIBAIJ(Mat); 12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int); 13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private" 23 static int CreateColmap_MPIBAIJ_Private(Mat mat) 24 { 25 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 26 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data; 27 int nbs = B->nbs,i,bs=B->bs;; 28 29 PetscFunctionBegin; 30 baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 31 PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 32 PetscMemzero(baij->colmap,baij->Nbs*sizeof(int)); 33 for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1; 34 PetscFunctionReturn(0); 35 } 36 37 #define CHUNKSIZE 10 38 39 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 40 { \ 41 \ 42 brow = row/bs; \ 43 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 44 rmax = aimax[brow]; nrow = ailen[brow]; \ 45 bcol = col/bs; \ 46 ridx = row % bs; cidx = col % bs; \ 47 low = 0; high = nrow; \ 48 while (high-low > 3) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > bcol) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=low; _i<high; _i++ ) { \ 54 if (rp[_i] > bcol) break; \ 55 if (rp[_i] == bcol) { \ 56 bap = ap + bs2*_i + bs*cidx + ridx; \ 57 if (addv == ADD_VALUES) *bap += value; \ 58 else *bap = value; \ 59 goto a_noinsert; \ 60 } \ 61 } \ 62 if (a->nonew == 1) goto a_noinsert; \ 63 else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 64 if (nrow >= rmax) { \ 65 /* there is no extra room in row, therefore enlarge */ \ 66 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 67 Scalar *new_a; \ 68 \ 69 if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 70 \ 71 /* malloc new storage space */ \ 72 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \ 73 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 74 new_j = (int *) (new_a + bs2*new_nz); \ 75 new_i = new_j + new_nz; \ 76 \ 77 /* copy over old data into new slots */ \ 78 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \ 79 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 80 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \ 81 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 82 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \ 83 len*sizeof(int)); \ 84 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \ 85 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 86 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 87 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); \ 88 /* free up old matrix storage */ \ 89 PetscFree(a->a); \ 90 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 91 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 92 a->singlemalloc = 1; \ 93 \ 94 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 95 rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 96 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 97 a->maxnz += bs2*CHUNKSIZE; \ 98 a->reallocs++; \ 99 a->nz++; \ 100 } \ 101 N = nrow++ - 1; \ 102 /* shift up all the later entries in this row */ \ 103 for ( ii=N; ii>=_i; ii-- ) { \ 104 rp[ii+1] = rp[ii]; \ 105 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 106 } \ 107 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 108 rp[_i] = bcol; \ 109 ap[bs2*_i + bs*cidx + ridx] = value; \ 110 a_noinsert:; \ 111 ailen[brow] = nrow; \ 112 } 113 114 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 brow = row/bs; \ 118 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 119 rmax = bimax[brow]; nrow = bilen[brow]; \ 120 bcol = col/bs; \ 121 ridx = row % bs; cidx = col % bs; \ 122 low = 0; high = nrow; \ 123 while (high-low > 3) { \ 124 t = (low+high)/2; \ 125 if (rp[t] > bcol) high = t; \ 126 else low = t; \ 127 } \ 128 for ( _i=low; _i<high; _i++ ) { \ 129 if (rp[_i] > bcol) break; \ 130 if (rp[_i] == bcol) { \ 131 bap = ap + bs2*_i + bs*cidx + ridx; \ 132 if (addv == ADD_VALUES) *bap += value; \ 133 else *bap = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (b->nonew == 1) goto b_noinsert; \ 138 else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + bs2*new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \ 156 len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 157 PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \ 158 len*sizeof(int)); \ 159 PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \ 160 PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \ 161 PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 162 ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar)); \ 163 /* free up old matrix storage */ \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 170 rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \ 172 b->maxnz += bs2*CHUNKSIZE; \ 173 b->reallocs++; \ 174 b->nz++; \ 175 } \ 176 N = nrow++ - 1; \ 177 /* shift up all the later entries in this row */ \ 178 for ( ii=N; ii>=_i; ii-- ) { \ 179 rp[ii+1] = rp[ii]; \ 180 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \ 181 } \ 182 if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar)); \ 183 rp[_i] = bcol; \ 184 ap[bs2*_i + bs*cidx + ridx] = value; \ 185 b_noinsert:; \ 186 bilen[brow] = nrow; \ 187 } 188 189 #undef __FUNC__ 190 #define __FUNC__ "MatSetValues_MPIBAIJ" 191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 192 { 193 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 194 Scalar value; 195 int ierr,i,j,row,col; 196 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 197 int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 198 int cend_orig=baij->cend_bs,bs=baij->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data; 203 int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 Scalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data; 208 int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 Scalar *ba=b->a; 210 211 int *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 int low,high,t,ridx,cidx,bs2=a->bs2; 213 Scalar *ap,*bap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 #if defined(USE_PETSC_BOPT_g) 218 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 219 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig){ 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 #if defined(USE_PETSC_BOPT_g) 231 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 232 else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 241 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ *) (B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba=b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !baij->donotstash) { 257 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!baij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 272 #undef __FUNC__ 273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ" 274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 277 Scalar *value,*barray=baij->barray; 278 int ierr,i,j,ii,jj,row,col,k,l; 279 int roworiented = baij->roworiented,rstart=baij->rstart ; 280 int rend=baij->rend,cstart=baij->cstart,stepval; 281 int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 282 283 if(!barray) { 284 baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray); 285 } 286 287 if (roworiented) { 288 stepval = (n-1)*bs; 289 } else { 290 stepval = (m-1)*bs; 291 } 292 for ( i=0; i<m; i++ ) { 293 #if defined(USE_PETSC_BOPT_g) 294 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 295 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 296 #endif 297 if (im[i] >= rstart && im[i] < rend) { 298 row = im[i] - rstart; 299 for ( j=0; j<n; j++ ) { 300 /* If NumCol = 1 then a copy is not required */ 301 if ((roworiented) && (n == 1)) { 302 barray = v + i*bs2; 303 } else if((!roworiented) && (m == 1)) { 304 barray = v + j*bs2; 305 } else { /* Here a copy is required */ 306 if (roworiented) { 307 value = v + i*(stepval+bs)*bs + j*bs; 308 } else { 309 value = v + j*(stepval+bs)*bs + i*bs; 310 } 311 for ( ii=0; ii<bs; ii++,value+=stepval ) { 312 for (jj=0; jj<bs; jj++ ) { 313 *barray++ = *value++; 314 } 315 } 316 barray -=bs2; 317 } 318 319 if (in[j] >= cstart && in[j] < cend){ 320 col = in[j] - cstart; 321 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 322 } 323 #if defined(USE_PETSC_BOPT_g) 324 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 325 else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 326 #endif 327 else { 328 if (mat->was_assembled) { 329 if (!baij->colmap) { 330 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 331 } 332 333 #if defined(USE_PETSC_BOPT_g) 334 if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");} 335 #endif 336 col = (baij->colmap[in[j]] - 1)/bs; 337 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 338 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 339 col = in[j]; 340 } 341 } 342 else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (!baij->donotstash) { 348 if (roworiented ) { 349 row = im[i]*bs; 350 value = v + i*(stepval+bs)*bs; 351 for ( j=0; j<bs; j++,row++ ) { 352 for ( k=0; k<n; k++ ) { 353 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 354 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 355 } 356 } 357 } 358 } else { 359 for ( j=0; j<n; j++ ) { 360 value = v + j*(stepval+bs)*bs + i*bs; 361 col = in[j]*bs; 362 for ( k=0; k<bs; k++,col++,value+=stepval) { 363 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 364 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 365 } 366 } 367 } 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 #include <math.h> 375 #define HASH_KEY 0.6180339887 376 #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) 377 378 #undef __FUNC__ 379 #define __FUNC__ "MatSetValues_MPIBAIJ_HT" 380 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 381 { 382 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 383 int ierr,i,j,row,col; 384 int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 385 int rend_orig=baij->rend_bs; 386 387 int h1,key,size=baij->ht_size,k,bs=baij->bs; 388 int * HT = baij->ht; 389 Scalar ** HD = baij->hd,value; 390 391 392 PetscFunctionBegin; 393 394 for ( i=0; i<m; i++ ) { 395 #if defined(USE_PETSC_BOPT_g) 396 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 397 if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 398 #endif 399 if (im[i] >= rstart_orig && im[i] < rend_orig) { 400 row = im[i]; 401 for ( j=0; j<n; j++ ) { 402 col = in[j]; 403 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 404 /* Look up into the Hash Table */ 405 key = (row/bs)*baij->Nbs+(col/bs)+1; 406 h1 = HASH1(size,key); 407 408 for ( k=0; k<size; k++ ){ 409 if (HT[(h1+k)%size] == key) { 410 if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value; 411 else *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) = value; 412 break; 413 } 414 } 415 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 416 } 417 } else { 418 if (roworiented && !baij->donotstash) { 419 ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 420 } else { 421 if (!baij->donotstash) { 422 row = im[i]; 423 for ( j=0; j<n; j++ ) { 424 ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 425 } 426 } 427 } 428 } 429 } 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNC__ 434 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT" 435 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 436 { 437 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 438 int ierr,i,j,ii,jj,row,col,k,l; 439 int roworiented = baij->roworiented,rstart=baij->rstart ; 440 int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 441 442 int h1,key,size=baij->ht_size; 443 int * HT = baij->ht; 444 Scalar ** HD = baij->hd,*value,*baij_a; 445 446 PetscFunctionBegin; 447 448 if (roworiented) { 449 stepval = (n-1)*bs; 450 } else { 451 stepval = (m-1)*bs; 452 } 453 for ( i=0; i<m; i++ ) { 454 #if defined(USE_PETSC_BOPT_g) 455 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 456 if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 457 #endif 458 if (im[i] >= rstart && im[i] < rend) { 459 row = im[i]; 460 for ( j=0; j<n; j++ ) { 461 col = in[j]; 462 463 /* Look up into the Hash Table */ 464 key = row*baij->Nbs+col+1; 465 h1 = HASH1(size,key); 466 467 for ( k=0; k<size; k++ ){ 468 if (HT[(h1+k)%size] == key) { 469 baij_a = HD[(h1+k)%size]; 470 471 if (roworiented) { 472 value = v + i*(stepval+bs)*bs + j*bs; 473 if (addv == ADD_VALUES) { 474 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 475 for ( jj=0; jj<bs2; jj+=bs ) { 476 baij_a[jj] += *value++; 477 } 478 } 479 } else { 480 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) { 481 for ( jj=0; jj<bs2; jj+=bs ) { 482 baij_a[jj] = *value++; 483 } 484 } 485 } 486 } else { 487 value = v + j*(stepval+bs)*bs + i*bs; 488 if (addv == ADD_VALUES) { 489 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 490 for ( jj=0; jj<bs; jj++ ) { 491 baij_a[jj] += *value++; 492 } 493 } 494 } else { 495 for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) { 496 for ( jj=0; jj<bs; jj++ ) { 497 baij_a[jj] = *value++; 498 } 499 } 500 } 501 } 502 break; 503 } 504 } 505 if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 506 } 507 } else { 508 if (!baij->donotstash) { 509 if (roworiented ) { 510 row = im[i]*bs; 511 value = v + i*(stepval+bs)*bs; 512 for ( j=0; j<bs; j++,row++ ) { 513 for ( k=0; k<n; k++ ) { 514 for ( col=in[k]*bs,l=0; l<bs; l++,col++) { 515 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 516 } 517 } 518 } 519 } else { 520 for ( j=0; j<n; j++ ) { 521 value = v + j*(stepval+bs)*bs + i*bs; 522 col = in[j]*bs; 523 for ( k=0; k<bs; k++,col++,value+=stepval) { 524 for ( row = im[i]*bs,l=0; l<bs; l++,row++) { 525 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr); 526 } 527 } 528 } 529 } 530 } 531 } 532 } 533 PetscFunctionReturn(0); 534 } 535 #undef __FUNC__ 536 #define __FUNC__ "MatGetValues_MPIBAIJ" 537 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 538 { 539 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 540 int bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs; 541 int bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col; 542 543 for ( i=0; i<m; i++ ) { 544 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 545 if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 546 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 547 row = idxm[i] - bsrstart; 548 for ( j=0; j<n; j++ ) { 549 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 550 if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 551 if (idxn[j] >= bscstart && idxn[j] < bscend){ 552 col = idxn[j] - bscstart; 553 ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 554 } else { 555 if (!baij->colmap) { 556 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 557 } 558 if((baij->colmap[idxn[j]/bs]-1 < 0) || 559 (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 560 else { 561 col = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs; 562 ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 563 } 564 } 565 } 566 } else { 567 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 568 } 569 } 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNC__ 574 #define __FUNC__ "MatNorm_MPIBAIJ" 575 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm) 576 { 577 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 578 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data; 579 int ierr, i,bs2=baij->bs2; 580 double sum = 0.0; 581 Scalar *v; 582 583 PetscFunctionBegin; 584 if (baij->size == 1) { 585 ierr = MatNorm(baij->A,type,norm); CHKERRQ(ierr); 586 } else { 587 if (type == NORM_FROBENIUS) { 588 v = amat->a; 589 for (i=0; i<amat->nz*bs2; i++ ) { 590 #if defined(USE_PETSC_COMPLEX) 591 sum += real(conj(*v)*(*v)); v++; 592 #else 593 sum += (*v)*(*v); v++; 594 #endif 595 } 596 v = bmat->a; 597 for (i=0; i<bmat->nz*bs2; i++ ) { 598 #if defined(USE_PETSC_COMPLEX) 599 sum += real(conj(*v)*(*v)); v++; 600 #else 601 sum += (*v)*(*v); v++; 602 #endif 603 } 604 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 605 *norm = sqrt(*norm); 606 } else { 607 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 608 } 609 } 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ" 615 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 618 MPI_Comm comm = mat->comm; 619 int size = baij->size, *owners = baij->rowners,bs=baij->bs; 620 int rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr; 621 MPI_Request *send_waits,*recv_waits; 622 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 623 InsertMode addv; 624 Scalar *rvalues,*svalues; 625 626 PetscFunctionBegin; 627 /* make sure all processors are either in INSERTMODE or ADDMODE */ 628 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 629 if (addv == (ADD_VALUES|INSERT_VALUES)) { 630 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 631 } 632 mat->insertmode = addv; /* in case this processor had no cache */ 633 634 /* first count number of contributors to each processor */ 635 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 636 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 637 owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 638 for ( i=0; i<baij->stash.n; i++ ) { 639 idx = baij->stash.idx[i]; 640 for ( j=0; j<size; j++ ) { 641 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 642 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 643 } 644 } 645 } 646 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 647 648 /* inform other processors of number of messages and max length*/ 649 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 650 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 651 nreceives = work[rank]; 652 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 653 nmax = work[rank]; 654 PetscFree(work); 655 656 /* post receives: 657 1) each message will consist of ordered pairs 658 (global index,value) we store the global index as a double 659 to simplify the message passing. 660 2) since we don't know how long each individual message is we 661 allocate the largest needed buffer for each receive. Potentially 662 this is a lot of wasted space. 663 664 665 This could be done better. 666 */ 667 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 668 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 669 for ( i=0; i<nreceives; i++ ) { 670 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 671 } 672 673 /* do sends: 674 1) starts[i] gives the starting index in svalues for stuff going to 675 the ith processor 676 */ 677 svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 678 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 679 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 680 starts[0] = 0; 681 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 682 for ( i=0; i<baij->stash.n; i++ ) { 683 svalues[3*starts[owner[i]]] = (Scalar) baij->stash.idx[i]; 684 svalues[3*starts[owner[i]]+1] = (Scalar) baij->stash.idy[i]; 685 svalues[3*(starts[owner[i]]++)+2] = baij->stash.array[i]; 686 } 687 PetscFree(owner); 688 starts[0] = 0; 689 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 690 count = 0; 691 for ( i=0; i<size; i++ ) { 692 if (procs[i]) { 693 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 694 } 695 } 696 PetscFree(starts); PetscFree(nprocs); 697 698 /* Free cache space */ 699 PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n); 700 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 701 702 baij->svalues = svalues; baij->rvalues = rvalues; 703 baij->nsends = nsends; baij->nrecvs = nreceives; 704 baij->send_waits = send_waits; baij->recv_waits = recv_waits; 705 baij->rmax = nmax; 706 707 PetscFunctionReturn(0); 708 } 709 710 /* 711 Creates the hash table, and sets the table 712 This table is created only once. 713 If new entried need to be added to the matrix 714 then the hash table has to be destroyed and 715 recreated. 716 */ 717 #undef __FUNC__ 718 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private" 719 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor) 720 { 721 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 722 Mat A = baij->A, B=baij->B; 723 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data; 724 int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 725 int size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart; 726 int cstart=baij->cstart,*garray=baij->garray,row,col; 727 int *HT,key; 728 Scalar **HD; 729 730 PetscFunctionBegin; 731 baij->ht_size=(int)(factor*nz); 732 size = baij->ht_size; 733 734 if (baij->ht) { 735 PetscFunctionReturn(0); 736 } 737 738 /* Allocate Memory for Hash Table */ 739 baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd); 740 baij->ht = (int*)(baij->hd + size); 741 HD = baij->hd; 742 HT = baij->ht; 743 744 745 PetscMemzero(HT,size*sizeof(int)+sizeof(Scalar*)); 746 747 748 /* Loop Over A */ 749 for ( i=0; i<a->mbs; i++ ) { 750 for ( j=ai[i]; j<ai[i+1]; j++ ) { 751 row = i+rstart; 752 col = aj[j]+cstart; 753 754 key = row*baij->Nbs + col + 1; 755 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 756 h1 = HASH1(size,key); 757 758 for ( k=0; k<size; k++ ){ 759 if (HT[(h1+k)%size] == 0.0) { 760 HT[(h1+k)%size] = key; 761 HD[(h1+k)%size] = a->a + j*bs2; 762 break; 763 } else ct++; 764 } 765 if (k> max1) max1 = k; 766 } 767 } 768 /* Loop Over B */ 769 for ( i=0; i<b->mbs; i++ ) { 770 for ( j=bi[i]; j<bi[i+1]; j++ ) { 771 row = i+rstart; 772 col = garray[bj[j]]; 773 key = row*baij->Nbs + col + 1; 774 /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */ 775 h1 = HASH1(size,key); 776 for ( k=0; k<size; k++ ){ 777 if (HT[(h1+k)%size] == 0.0) { 778 HT[(h1+k)%size] = key; 779 HD[(h1+k)%size] = b->a + j*bs2; 780 break; 781 } else ct++; 782 } 783 if (k> max2) max2 = k; 784 } 785 } 786 787 /* Print Summary */ 788 for ( i=0,key=0.0,j=0; i<size; i++) 789 if (HT[i]) {j++;} 790 791 /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n", 792 PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */ 793 fflush(stdout); 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNC__ 798 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ" 799 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 802 MPI_Status *send_status,recv_status; 803 int imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr; 804 int bs=baij->bs,row,col,other_disassembled,flg; 805 Scalar *values,val; 806 InsertMode addv = mat->insertmode; 807 808 PetscFunctionBegin; 809 /* wait on receives */ 810 while (count) { 811 ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 812 /* unpack receives into our local space */ 813 values = baij->rvalues + 3*imdex*baij->rmax; 814 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 815 n = n/3; 816 for ( i=0; i<n; i++ ) { 817 row = (int) PetscReal(values[3*i]) - baij->rstart*bs; 818 col = (int) PetscReal(values[3*i+1]); 819 val = values[3*i+2]; 820 if (col >= baij->cstart*bs && col < baij->cend*bs) { 821 col -= baij->cstart*bs; 822 ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 823 } else { 824 if (mat->was_assembled) { 825 if (!baij->colmap) { 826 ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr); 827 } 828 col = (baij->colmap[col/bs]) - 1 + col%bs; 829 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 830 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 831 col = (int) PetscReal(values[3*i+1]); 832 } 833 } 834 ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr) 835 } 836 } 837 count--; 838 } 839 PetscFree(baij->recv_waits); PetscFree(baij->rvalues); 840 841 /* wait on sends */ 842 if (baij->nsends) { 843 send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 844 ierr = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr); 845 PetscFree(send_status); 846 } 847 PetscFree(baij->send_waits); PetscFree(baij->svalues); 848 849 ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr); 850 ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr); 851 852 /* determine if any processor has disassembled, if so we must 853 also disassemble ourselfs, in order that we may reassemble. */ 854 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 855 if (mat->was_assembled && !other_disassembled) { 856 ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr); 857 } 858 859 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 860 ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr); 861 } 862 ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr); 863 ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr); 864 865 ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr); 866 if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) { 867 double fact = 1.39; 868 ierr = OptionsGetDouble(PETSC_NULL,"-use_hash",&fact,&flg); CHKERRQ(ierr); 869 if (fact <= 1.0) fact = 1.39; 870 PLogInfo(0,"[%d]MatAssemblyEnd_MPIBAIJ:Hash table Factor used %5.2f\n",PetscGlobalRank,fact); 871 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,fact); CHKERRQ(ierr); 872 mat->ops.setvalues = MatSetValues_MPIBAIJ_HT; 873 mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 874 } 875 if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;} 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNC__ 880 #define __FUNC__ "MatView_MPIBAIJ_Binary" 881 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer) 882 { 883 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 884 int ierr; 885 886 PetscFunctionBegin; 887 if (baij->size == 1) { 888 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 889 } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNC__ 894 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab" 895 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 896 { 897 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 898 int ierr, format,rank,bs = baij->bs; 899 FILE *fd; 900 ViewerType vtype; 901 902 PetscFunctionBegin; 903 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 904 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 905 ierr = ViewerGetFormat(viewer,&format); 906 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 907 MatInfo info; 908 MPI_Comm_rank(mat->comm,&rank); 909 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 910 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 911 PetscSequentialPhaseBegin(mat->comm,1); 912 fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 913 rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 914 baij->bs,(int)info.memory); 915 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info); 916 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 917 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info); 918 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs); 919 fflush(fd); 920 PetscSequentialPhaseEnd(mat->comm,1); 921 ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 924 PetscPrintf(mat->comm," block size is %d\n",bs); 925 PetscFunctionReturn(0); 926 } 927 } 928 929 if (vtype == DRAW_VIEWER) { 930 Draw draw; 931 PetscTruth isnull; 932 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 933 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 934 } 935 936 if (vtype == ASCII_FILE_VIEWER) { 937 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 938 PetscSequentialPhaseBegin(mat->comm,1); 939 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 940 baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n, 941 baij->cstart*bs,baij->cend*bs); 942 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 943 ierr = MatView(baij->B,viewer); CHKERRQ(ierr); 944 fflush(fd); 945 PetscSequentialPhaseEnd(mat->comm,1); 946 } else { 947 int size = baij->size; 948 rank = baij->rank; 949 if (size == 1) { 950 ierr = MatView(baij->A,viewer); CHKERRQ(ierr); 951 } else { 952 /* assemble the entire matrix onto first processor. */ 953 Mat A; 954 Mat_SeqBAIJ *Aloc; 955 int M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals; 956 int mbs=baij->mbs; 957 Scalar *a; 958 959 if (!rank) { 960 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 961 } else { 962 ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 963 } 964 PLogObjectParent(mat,A); 965 966 /* copy over the A part */ 967 Aloc = (Mat_SeqBAIJ*) baij->A->data; 968 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 969 row = baij->rstart; 970 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 971 972 for ( i=0; i<mbs; i++ ) { 973 rvals[0] = bs*(baij->rstart + i); 974 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 975 for ( j=ai[i]; j<ai[i+1]; j++ ) { 976 col = (baij->cstart+aj[j])*bs; 977 for (k=0; k<bs; k++ ) { 978 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 979 col++; a += bs; 980 } 981 } 982 } 983 /* copy over the B part */ 984 Aloc = (Mat_SeqBAIJ*) baij->B->data; 985 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 986 row = baij->rstart*bs; 987 for ( i=0; i<mbs; i++ ) { 988 rvals[0] = bs*(baij->rstart + i); 989 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 990 for ( j=ai[i]; j<ai[i+1]; j++ ) { 991 col = baij->garray[aj[j]]*bs; 992 for (k=0; k<bs; k++ ) { 993 ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 994 col++; a += bs; 995 } 996 } 997 } 998 PetscFree(rvals); 999 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1000 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1001 /* 1002 Everyone has to call to draw the matrix since the graphics waits are 1003 synchronized across all processors that share the Draw object 1004 */ 1005 if (!rank || vtype == DRAW_VIEWER) { 1006 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1007 } 1008 ierr = MatDestroy(A); CHKERRQ(ierr); 1009 } 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 1015 1016 #undef __FUNC__ 1017 #define __FUNC__ "MatView_MPIBAIJ" 1018 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer) 1019 { 1020 Mat mat = (Mat) obj; 1021 int ierr; 1022 ViewerType vtype; 1023 1024 PetscFunctionBegin; 1025 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1026 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 1027 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 1028 ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 1029 } else if (vtype == BINARY_FILE_VIEWER) { 1030 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1031 } 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #undef __FUNC__ 1036 #define __FUNC__ "MatDestroy_MPIBAIJ" 1037 int MatDestroy_MPIBAIJ(PetscObject obj) 1038 { 1039 Mat mat = (Mat) obj; 1040 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1041 int ierr; 1042 1043 PetscFunctionBegin; 1044 #if defined(USE_PETSC_LOG) 1045 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 1046 #endif 1047 1048 ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr); 1049 PetscFree(baij->rowners); 1050 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1051 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1052 if (baij->colmap) PetscFree(baij->colmap); 1053 if (baij->garray) PetscFree(baij->garray); 1054 if (baij->lvec) VecDestroy(baij->lvec); 1055 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1056 if (baij->rowvalues) PetscFree(baij->rowvalues); 1057 if (baij->barray) PetscFree(baij->barray); 1058 if (baij->hd) PetscFree(baij->hd); 1059 PetscFree(baij); 1060 PLogObjectDestroy(mat); 1061 PetscHeaderDestroy(mat); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNC__ 1066 #define __FUNC__ "MatMult_MPIBAIJ" 1067 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1070 int ierr, nt; 1071 1072 PetscFunctionBegin; 1073 VecGetLocalSize_Fast(xx,nt); 1074 if (nt != a->n) { 1075 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1076 } 1077 VecGetLocalSize_Fast(yy,nt); 1078 if (nt != a->m) { 1079 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1080 } 1081 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1082 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 1083 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1084 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 1085 ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNC__ 1090 #define __FUNC__ "MatMultAdd_MPIBAIJ" 1091 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1092 { 1093 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1094 int ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1098 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1099 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 1100 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNC__ 1105 #define __FUNC__ "MatMultTrans_MPIBAIJ" 1106 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy) 1107 { 1108 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1109 int ierr; 1110 1111 PetscFunctionBegin; 1112 /* do nondiagonal part */ 1113 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1114 /* send it on its way */ 1115 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1116 /* do local part */ 1117 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 1118 /* receive remote parts: note this assumes the values are not actually */ 1119 /* inserted in yy until the next line, which is true for my implementation*/ 1120 /* but is not perhaps always true. */ 1121 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNC__ 1126 #define __FUNC__ "MatMultTransAdd_MPIBAIJ" 1127 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1128 { 1129 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1130 int ierr; 1131 1132 PetscFunctionBegin; 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 1135 /* send it on its way */ 1136 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1137 /* do local part */ 1138 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 1139 /* receive remote parts: note this assumes the values are not actually */ 1140 /* inserted in yy until the next line, which is true for my implementation*/ 1141 /* but is not perhaps always true. */ 1142 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 /* 1147 This only works correctly for square matrices where the subblock A->A is the 1148 diagonal block 1149 */ 1150 #undef __FUNC__ 1151 #define __FUNC__ "MatGetDiagonal_MPIBAIJ" 1152 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1153 { 1154 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1155 int ierr; 1156 1157 PetscFunctionBegin; 1158 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 1159 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNC__ 1164 #define __FUNC__ "MatScale_MPIBAIJ" 1165 int MatScale_MPIBAIJ(Scalar *aa,Mat A) 1166 { 1167 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1168 int ierr; 1169 1170 PetscFunctionBegin; 1171 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 1172 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNC__ 1177 #define __FUNC__ "MatGetSize_MPIBAIJ" 1178 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n) 1179 { 1180 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1181 1182 PetscFunctionBegin; 1183 if (m) *m = mat->M; 1184 if (n) *n = mat->N; 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNC__ 1189 #define __FUNC__ "MatGetLocalSize_MPIBAIJ" 1190 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n) 1191 { 1192 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1193 1194 PetscFunctionBegin; 1195 *m = mat->m; *n = mat->N; 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNC__ 1200 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ" 1201 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n) 1202 { 1203 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1204 1205 PetscFunctionBegin; 1206 *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs; 1207 PetscFunctionReturn(0); 1208 } 1209 1210 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1211 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**); 1212 1213 #undef __FUNC__ 1214 #define __FUNC__ "MatGetRow_MPIBAIJ" 1215 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1216 { 1217 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data; 1218 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1219 int bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB; 1220 int nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs; 1221 int *cmap, *idx_p,cstart = mat->cstart; 1222 1223 PetscFunctionBegin; 1224 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1225 mat->getrowactive = PETSC_TRUE; 1226 1227 if (!mat->rowvalues && (idx || v)) { 1228 /* 1229 allocate enough space to hold information from the longest row. 1230 */ 1231 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data; 1232 int max = 1,mbs = mat->mbs,tmp; 1233 for ( i=0; i<mbs; i++ ) { 1234 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1235 if (max < tmp) { max = tmp; } 1236 } 1237 mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar))); 1238 CHKPTRQ(mat->rowvalues); 1239 mat->rowindices = (int *) (mat->rowvalues + max*bs2); 1240 } 1241 1242 if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1243 lrow = row - brstart; 1244 1245 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1246 if (!v) {pvA = 0; pvB = 0;} 1247 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1248 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1249 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1250 nztot = nzA + nzB; 1251 1252 cmap = mat->garray; 1253 if (v || idx) { 1254 if (nztot) { 1255 /* Sort by increasing column numbers, assuming A and B already sorted */ 1256 int imark = -1; 1257 if (v) { 1258 *v = v_p = mat->rowvalues; 1259 for ( i=0; i<nzB; i++ ) { 1260 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1261 else break; 1262 } 1263 imark = i; 1264 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1265 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1266 } 1267 if (idx) { 1268 *idx = idx_p = mat->rowindices; 1269 if (imark > -1) { 1270 for ( i=0; i<imark; i++ ) { 1271 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1272 } 1273 } else { 1274 for ( i=0; i<nzB; i++ ) { 1275 if (cmap[cworkB[i]/bs] < cstart) 1276 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1277 else break; 1278 } 1279 imark = i; 1280 } 1281 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart*bs + cworkA[i]; 1282 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1283 } 1284 } else { 1285 if (idx) *idx = 0; 1286 if (v) *v = 0; 1287 } 1288 } 1289 *nz = nztot; 1290 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1291 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNC__ 1296 #define __FUNC__ "MatRestoreRow_MPIBAIJ" 1297 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1298 { 1299 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1300 1301 PetscFunctionBegin; 1302 if (baij->getrowactive == PETSC_FALSE) { 1303 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1304 } 1305 baij->getrowactive = PETSC_FALSE; 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNC__ 1310 #define __FUNC__ "MatGetBlockSize_MPIBAIJ" 1311 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs) 1312 { 1313 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 1314 1315 PetscFunctionBegin; 1316 *bs = baij->bs; 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNC__ 1321 #define __FUNC__ "MatZeroEntries_MPIBAIJ" 1322 int MatZeroEntries_MPIBAIJ(Mat A) 1323 { 1324 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1325 int ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 1329 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 #undef __FUNC__ 1334 #define __FUNC__ "MatGetInfo_MPIBAIJ" 1335 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1336 { 1337 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data; 1338 Mat A = a->A, B = a->B; 1339 int ierr; 1340 double isend[5], irecv[5]; 1341 1342 PetscFunctionBegin; 1343 info->block_size = (double)a->bs; 1344 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1345 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory; 1346 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1347 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory; 1348 if (flag == MAT_LOCAL) { 1349 info->nz_used = isend[0]; 1350 info->nz_allocated = isend[1]; 1351 info->nz_unneeded = isend[2]; 1352 info->memory = isend[3]; 1353 info->mallocs = isend[4]; 1354 } else if (flag == MAT_GLOBAL_MAX) { 1355 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr); 1356 info->nz_used = irecv[0]; 1357 info->nz_allocated = irecv[1]; 1358 info->nz_unneeded = irecv[2]; 1359 info->memory = irecv[3]; 1360 info->mallocs = irecv[4]; 1361 } else if (flag == MAT_GLOBAL_SUM) { 1362 ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr); 1363 info->nz_used = irecv[0]; 1364 info->nz_allocated = irecv[1]; 1365 info->nz_unneeded = irecv[2]; 1366 info->memory = irecv[3]; 1367 info->mallocs = irecv[4]; 1368 } 1369 info->rows_global = (double)a->M; 1370 info->columns_global = (double)a->N; 1371 info->rows_local = (double)a->m; 1372 info->columns_local = (double)a->N; 1373 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1374 info->fill_ratio_needed = 0; 1375 info->factor_mallocs = 0; 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNC__ 1380 #define __FUNC__ "MatSetOption_MPIBAIJ" 1381 int MatSetOption_MPIBAIJ(Mat A,MatOption op) 1382 { 1383 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data; 1384 1385 PetscFunctionBegin; 1386 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1387 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1388 op == MAT_COLUMNS_UNSORTED || 1389 op == MAT_COLUMNS_SORTED || 1390 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1391 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1392 MatSetOption(a->A,op); 1393 MatSetOption(a->B,op); 1394 } else if (op == MAT_ROW_ORIENTED) { 1395 a->roworiented = 1; 1396 MatSetOption(a->A,op); 1397 MatSetOption(a->B,op); 1398 } else if (op == MAT_ROWS_SORTED || 1399 op == MAT_ROWS_UNSORTED || 1400 op == MAT_SYMMETRIC || 1401 op == MAT_STRUCTURALLY_SYMMETRIC || 1402 op == MAT_YES_NEW_DIAGONALS) 1403 PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1404 else if (op == MAT_COLUMN_ORIENTED) { 1405 a->roworiented = 0; 1406 MatSetOption(a->A,op); 1407 MatSetOption(a->B,op); 1408 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1409 a->donotstash = 1; 1410 } else if (op == MAT_NO_NEW_DIAGONALS) { 1411 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1412 } else { 1413 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1414 } 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNC__ 1419 #define __FUNC__ "MatTranspose_MPIBAIJ(" 1420 int MatTranspose_MPIBAIJ(Mat A,Mat *matout) 1421 { 1422 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 1423 Mat_SeqBAIJ *Aloc; 1424 Mat B; 1425 int ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col; 1426 int bs=baij->bs,mbs=baij->mbs; 1427 Scalar *a; 1428 1429 PetscFunctionBegin; 1430 if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1431 ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B); 1432 CHKERRQ(ierr); 1433 1434 /* copy over the A part */ 1435 Aloc = (Mat_SeqBAIJ*) baij->A->data; 1436 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1437 row = baij->rstart; 1438 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 1439 1440 for ( i=0; i<mbs; i++ ) { 1441 rvals[0] = bs*(baij->rstart + i); 1442 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1443 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1444 col = (baij->cstart+aj[j])*bs; 1445 for (k=0; k<bs; k++ ) { 1446 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1447 col++; a += bs; 1448 } 1449 } 1450 } 1451 /* copy over the B part */ 1452 Aloc = (Mat_SeqBAIJ*) baij->B->data; 1453 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1454 row = baij->rstart*bs; 1455 for ( i=0; i<mbs; i++ ) { 1456 rvals[0] = bs*(baij->rstart + i); 1457 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 1458 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1459 col = baij->garray[aj[j]]*bs; 1460 for (k=0; k<bs; k++ ) { 1461 ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1462 col++; a += bs; 1463 } 1464 } 1465 } 1466 PetscFree(rvals); 1467 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1468 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1469 1470 if (matout != PETSC_NULL) { 1471 *matout = B; 1472 } else { 1473 /* This isn't really an in-place transpose .... but free data structures from baij */ 1474 PetscFree(baij->rowners); 1475 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 1476 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 1477 if (baij->colmap) PetscFree(baij->colmap); 1478 if (baij->garray) PetscFree(baij->garray); 1479 if (baij->lvec) VecDestroy(baij->lvec); 1480 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 1481 PetscFree(baij); 1482 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1483 PetscHeaderDestroy(B); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNC__ 1489 #define __FUNC__ "MatDiagonalScale_MPIBAIJ" 1490 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr) 1491 { 1492 Mat a = ((Mat_MPIBAIJ *) A->data)->A; 1493 Mat b = ((Mat_MPIBAIJ *) A->data)->B; 1494 int ierr,s1,s2,s3; 1495 1496 PetscFunctionBegin; 1497 if (ll) { 1498 ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr); 1499 ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr); 1500 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes"); 1501 ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr); 1502 ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr); 1503 } 1504 if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector"); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNC__ 1509 #define __FUNC__ "MatZeroRows_MPIBAIJ" 1510 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag) 1511 { 1512 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1513 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 1514 int *procs,*nprocs,j,found,idx,nsends,*work,row; 1515 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1516 int *rvalues,tag = A->tag,count,base,slen,n,*source; 1517 int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1518 MPI_Comm comm = A->comm; 1519 MPI_Request *send_waits,*recv_waits; 1520 MPI_Status recv_status,*send_status; 1521 IS istmp; 1522 1523 PetscFunctionBegin; 1524 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 1525 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 1526 1527 /* first count number of contributors to each processor */ 1528 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 1529 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 1530 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 1531 for ( i=0; i<N; i++ ) { 1532 idx = rows[i]; 1533 found = 0; 1534 for ( j=0; j<size; j++ ) { 1535 if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1536 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1537 } 1538 } 1539 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1540 } 1541 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 1542 1543 /* inform other processors of number of messages and max length*/ 1544 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 1545 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1546 nrecvs = work[rank]; 1547 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 1548 nmax = work[rank]; 1549 PetscFree(work); 1550 1551 /* post receives: */ 1552 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues); 1553 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1554 for ( i=0; i<nrecvs; i++ ) { 1555 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1556 } 1557 1558 /* do sends: 1559 1) starts[i] gives the starting index in svalues for stuff going to 1560 the ith processor 1561 */ 1562 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 1563 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1564 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 1565 starts[0] = 0; 1566 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1567 for ( i=0; i<N; i++ ) { 1568 svalues[starts[owner[i]]++] = rows[i]; 1569 } 1570 ISRestoreIndices(is,&rows); 1571 1572 starts[0] = 0; 1573 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 1574 count = 0; 1575 for ( i=0; i<size; i++ ) { 1576 if (procs[i]) { 1577 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1578 } 1579 } 1580 PetscFree(starts); 1581 1582 base = owners[rank]*bs; 1583 1584 /* wait on receives */ 1585 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 1586 source = lens + nrecvs; 1587 count = nrecvs; slen = 0; 1588 while (count) { 1589 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1590 /* unpack receives into our local space */ 1591 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1592 source[imdex] = recv_status.MPI_SOURCE; 1593 lens[imdex] = n; 1594 slen += n; 1595 count--; 1596 } 1597 PetscFree(recv_waits); 1598 1599 /* move the data into the send scatter */ 1600 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 1601 count = 0; 1602 for ( i=0; i<nrecvs; i++ ) { 1603 values = rvalues + i*nmax; 1604 for ( j=0; j<lens[i]; j++ ) { 1605 lrows[count++] = values[j] - base; 1606 } 1607 } 1608 PetscFree(rvalues); PetscFree(lens); 1609 PetscFree(owner); PetscFree(nprocs); 1610 1611 /* actually zap the local rows */ 1612 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1613 PLogObjectParent(A,istmp); 1614 1615 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 1616 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 1617 ierr = ISDestroy(istmp); CHKERRQ(ierr); 1618 1619 if (diag) { 1620 for ( i = 0; i < slen; i++ ) { 1621 row = lrows[i] + rstart_bs; 1622 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr); 1623 } 1624 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1625 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1626 } 1627 PetscFree(lrows); 1628 1629 /* wait on sends */ 1630 if (nsends) { 1631 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1632 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1633 PetscFree(send_status); 1634 } 1635 PetscFree(send_waits); PetscFree(svalues); 1636 1637 PetscFunctionReturn(0); 1638 } 1639 extern int MatPrintHelp_SeqBAIJ(Mat); 1640 #undef __FUNC__ 1641 #define __FUNC__ "MatPrintHelp_MPIBAIJ" 1642 int MatPrintHelp_MPIBAIJ(Mat A) 1643 { 1644 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1645 int ierr; 1646 1647 PetscFunctionBegin; 1648 if (!a->rank) { 1649 ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr); 1650 } 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNC__ 1655 #define __FUNC__ "MatSetUnfactored_MPIBAIJ" 1656 int MatSetUnfactored_MPIBAIJ(Mat A) 1657 { 1658 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 1659 int ierr; 1660 1661 PetscFunctionBegin; 1662 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int); 1667 1668 /* -------------------------------------------------------------------*/ 1669 static struct _MatOps MatOps = { 1670 MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ, 1671 MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0, 1672 0,0,0,0, 1673 0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ, 1674 0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ, 1675 MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ, 1676 MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0, 1677 0,0,0,MatGetSize_MPIBAIJ, 1678 MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0, 1679 0,0,MatConvertSameType_MPIBAIJ,0,0, 1680 0,0,0,MatGetSubMatrices_MPIBAIJ, 1681 MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ, 1682 MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ, 1683 0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ}; 1684 1685 1686 #undef __FUNC__ 1687 #define __FUNC__ "MatCreateMPIBAIJ" 1688 /*@C 1689 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 1690 (block compressed row). For good matrix assembly performance 1691 the user should preallocate the matrix storage by setting the parameters 1692 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1693 performance can be increased by more than a factor of 50. 1694 1695 Input Parameters: 1696 . comm - MPI communicator 1697 . bs - size of blockk 1698 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1699 This value should be the same as the local size used in creating the 1700 y vector for the matrix-vector product y = Ax. 1701 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1702 This value should be the same as the local size used in creating the 1703 x vector for the matrix-vector product y = Ax. 1704 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1705 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1706 . d_nz - number of block nonzeros per block row in diagonal portion of local 1707 submatrix (same for all local rows) 1708 . d_nzz - array containing the number of block nonzeros in the various block rows 1709 of the in diagonal portion of the local (possibly different for each block 1710 row) or PETSC_NULL. You must leave room for the diagonal entry even if 1711 it is zero. 1712 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1713 submatrix (same for all local rows). 1714 . o_nzz - array containing the number of nonzeros in the various block rows of the 1715 off-diagonal portion of the local submatrix (possibly different for 1716 each block row) or PETSC_NULL. 1717 1718 Output Parameter: 1719 . A - the matrix 1720 1721 Notes: 1722 The user MUST specify either the local or global matrix dimensions 1723 (possibly both). 1724 1725 Storage Information: 1726 For a square global matrix we define each processor's diagonal portion 1727 to be its local rows and the corresponding columns (a square submatrix); 1728 each processor's off-diagonal portion encompasses the remainder of the 1729 local matrix (a rectangular submatrix). 1730 1731 The user can specify preallocated storage for the diagonal part of 1732 the local submatrix with either d_nz or d_nnz (not both). Set 1733 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1734 memory allocation. Likewise, specify preallocated storage for the 1735 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1736 1737 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1738 the figure below we depict these three local rows and all columns (0-11). 1739 1740 $ 0 1 2 3 4 5 6 7 8 9 10 11 1741 $ ------------------- 1742 $ row 3 | o o o d d d o o o o o o 1743 $ row 4 | o o o d d d o o o o o o 1744 $ row 5 | o o o d d d o o o o o o 1745 $ ------------------- 1746 $ 1747 1748 Thus, any entries in the d locations are stored in the d (diagonal) 1749 submatrix, and any entries in the o locations are stored in the 1750 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1751 stored simply in the MATSEQBAIJ format for compressed row storage. 1752 1753 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1754 and o_nz should indicate the number of block nonzeros per row in the o matrix. 1755 In general, for PDE problems in which most nonzeros are near the diagonal, 1756 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1757 or you will get TERRIBLE performance; see the users' manual chapter on 1758 matrices. 1759 1760 .keywords: matrix, block, aij, compressed row, sparse, parallel 1761 1762 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 1763 @*/ 1764 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 1765 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1766 { 1767 Mat B; 1768 Mat_MPIBAIJ *b; 1769 int ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size; 1770 1771 PetscFunctionBegin; 1772 if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 1773 1774 MPI_Comm_size(comm,&size); 1775 if (size == 1) { 1776 if (M == PETSC_DECIDE) M = m; 1777 if (N == PETSC_DECIDE) N = n; 1778 ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 *A = 0; 1783 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView); 1784 PLogObjectCreate(B); 1785 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 1786 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 1787 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1788 1789 B->destroy = MatDestroy_MPIBAIJ; 1790 B->view = MatView_MPIBAIJ; 1791 B->mapping = 0; 1792 B->factor = 0; 1793 B->assembled = PETSC_FALSE; 1794 1795 B->insertmode = NOT_SET_VALUES; 1796 MPI_Comm_rank(comm,&b->rank); 1797 MPI_Comm_size(comm,&b->size); 1798 1799 if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1800 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1801 } 1802 if ( M == PETSC_DECIDE && m == PETSC_DECIDE) { 1803 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 1804 } 1805 if ( N == PETSC_DECIDE && n == PETSC_DECIDE) { 1806 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified"); 1807 } 1808 if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 1809 if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE; 1810 1811 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1812 work[0] = m; work[1] = n; 1813 mbs = m/bs; nbs = n/bs; 1814 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1815 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 1816 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 1817 } 1818 if (m == PETSC_DECIDE) { 1819 Mbs = M/bs; 1820 if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 1821 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 1822 m = mbs*bs; 1823 } 1824 if (n == PETSC_DECIDE) { 1825 Nbs = N/bs; 1826 if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize"); 1827 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 1828 n = nbs*bs; 1829 } 1830 if (mbs*bs != m || nbs*bs != n) { 1831 SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize"); 1832 } 1833 1834 b->m = m; B->m = m; 1835 b->n = n; B->n = n; 1836 b->N = N; B->N = N; 1837 b->M = M; B->M = M; 1838 b->bs = bs; 1839 b->bs2 = bs*bs; 1840 b->mbs = mbs; 1841 b->nbs = nbs; 1842 b->Mbs = Mbs; 1843 b->Nbs = Nbs; 1844 1845 /* build local table of row and column ownerships */ 1846 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1847 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1848 b->cowners = b->rowners + b->size + 2; 1849 ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1850 b->rowners[0] = 0; 1851 for ( i=2; i<=b->size; i++ ) { 1852 b->rowners[i] += b->rowners[i-1]; 1853 } 1854 b->rstart = b->rowners[b->rank]; 1855 b->rend = b->rowners[b->rank+1]; 1856 b->rstart_bs = b->rstart * bs; 1857 b->rend_bs = b->rend * bs; 1858 1859 ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1860 b->cowners[0] = 0; 1861 for ( i=2; i<=b->size; i++ ) { 1862 b->cowners[i] += b->cowners[i-1]; 1863 } 1864 b->cstart = b->cowners[b->rank]; 1865 b->cend = b->cowners[b->rank+1]; 1866 b->cstart_bs = b->cstart * bs; 1867 b->cend_bs = b->cend * bs; 1868 1869 1870 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1871 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1872 PLogObjectParent(B,b->A); 1873 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1874 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1875 PLogObjectParent(B,b->B); 1876 1877 /* build cache for off array entries formed */ 1878 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1879 b->donotstash = 0; 1880 b->colmap = 0; 1881 b->garray = 0; 1882 b->roworiented = 1; 1883 1884 /* stuff used in block assembly */ 1885 b->barray = 0; 1886 1887 /* stuff used for matrix vector multiply */ 1888 b->lvec = 0; 1889 b->Mvctx = 0; 1890 1891 /* stuff for MatGetRow() */ 1892 b->rowindices = 0; 1893 b->rowvalues = 0; 1894 b->getrowactive = PETSC_FALSE; 1895 1896 /* hash table stuff */ 1897 b->ht = 0; 1898 b->ht_size = 0; 1899 1900 *A = B; 1901 PetscFunctionReturn(0); 1902 } 1903 1904 #undef __FUNC__ 1905 #define __FUNC__ "MatConvertSameType_MPIBAIJ" 1906 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues) 1907 { 1908 Mat mat; 1909 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data; 1910 int ierr, len=0, flg; 1911 1912 PetscFunctionBegin; 1913 *newmat = 0; 1914 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView); 1915 PLogObjectCreate(mat); 1916 mat->data = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a); 1917 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1918 mat->destroy = MatDestroy_MPIBAIJ; 1919 mat->view = MatView_MPIBAIJ; 1920 mat->factor = matin->factor; 1921 mat->assembled = PETSC_TRUE; 1922 1923 a->m = mat->m = oldmat->m; 1924 a->n = mat->n = oldmat->n; 1925 a->M = mat->M = oldmat->M; 1926 a->N = mat->N = oldmat->N; 1927 1928 a->bs = oldmat->bs; 1929 a->bs2 = oldmat->bs2; 1930 a->mbs = oldmat->mbs; 1931 a->nbs = oldmat->nbs; 1932 a->Mbs = oldmat->Mbs; 1933 a->Nbs = oldmat->Nbs; 1934 1935 a->rstart = oldmat->rstart; 1936 a->rend = oldmat->rend; 1937 a->cstart = oldmat->cstart; 1938 a->cend = oldmat->cend; 1939 a->size = oldmat->size; 1940 a->rank = oldmat->rank; 1941 mat->insertmode = NOT_SET_VALUES; 1942 a->rowvalues = 0; 1943 a->getrowactive = PETSC_FALSE; 1944 a->barray = 0; 1945 1946 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1947 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ)); 1948 a->cowners = a->rowners + a->size + 2; 1949 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1950 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1951 if (oldmat->colmap) { 1952 a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 1953 PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 1954 PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int)); 1955 } else a->colmap = 0; 1956 if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) { 1957 a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray); 1958 PLogObjectMemory(mat,len*sizeof(int)); 1959 PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1960 } else a->garray = 0; 1961 1962 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1963 PLogObjectParent(mat,a->lvec); 1964 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1965 PLogObjectParent(mat,a->Mvctx); 1966 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1967 PLogObjectParent(mat,a->A); 1968 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1969 PLogObjectParent(mat,a->B); 1970 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1971 if (flg) { 1972 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1973 } 1974 *newmat = mat; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #include "sys.h" 1979 1980 #undef __FUNC__ 1981 #define __FUNC__ "MatLoad_MPIBAIJ" 1982 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat) 1983 { 1984 Mat A; 1985 int i, nz, ierr, j,rstart, rend, fd; 1986 Scalar *vals,*buf; 1987 MPI_Comm comm = ((PetscObject)viewer)->comm; 1988 MPI_Status status; 1989 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 1990 int *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf; 1991 int flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows; 1992 int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 1993 int dcount,kmax,k,nzcount,tmp; 1994 1995 PetscFunctionBegin; 1996 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1997 bs2 = bs*bs; 1998 1999 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2000 if (!rank) { 2001 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2002 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2003 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2004 if (header[3] < 0) { 2005 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ"); 2006 } 2007 } 2008 2009 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2010 M = header[1]; N = header[2]; 2011 2012 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2013 2014 /* 2015 This code adds extra rows to make sure the number of rows is 2016 divisible by the blocksize 2017 */ 2018 Mbs = M/bs; 2019 extra_rows = bs - M + bs*(Mbs); 2020 if (extra_rows == bs) extra_rows = 0; 2021 else Mbs++; 2022 if (extra_rows &&!rank) { 2023 PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"); 2024 } 2025 2026 /* determine ownership of all rows */ 2027 mbs = Mbs/size + ((Mbs % size) > rank); 2028 m = mbs * bs; 2029 rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners); 2030 browners = rowners + size + 1; 2031 ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2032 rowners[0] = 0; 2033 for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1]; 2034 for ( i=0; i<=size; i++ ) browners[i] = rowners[i]*bs; 2035 rstart = rowners[rank]; 2036 rend = rowners[rank+1]; 2037 2038 /* distribute row lengths to all processors */ 2039 locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens); 2040 if (!rank) { 2041 rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths); 2042 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2043 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2044 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2045 for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i]; 2046 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2047 PetscFree(sndcounts); 2048 } else { 2049 ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr); 2050 } 2051 2052 if (!rank) { 2053 /* calculate the number of nonzeros on each processor */ 2054 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2055 PetscMemzero(procsnz,size*sizeof(int)); 2056 for ( i=0; i<size; i++ ) { 2057 for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) { 2058 procsnz[i] += rowlengths[j]; 2059 } 2060 } 2061 PetscFree(rowlengths); 2062 2063 /* determine max buffer needed and allocate it */ 2064 maxnz = 0; 2065 for ( i=0; i<size; i++ ) { 2066 maxnz = PetscMax(maxnz,procsnz[i]); 2067 } 2068 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2069 2070 /* read in my part of the matrix column indices */ 2071 nz = procsnz[0]; 2072 ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2073 mycols = ibuf; 2074 if (size == 1) nz -= extra_rows; 2075 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2076 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2077 2078 /* read in every ones (except the last) and ship off */ 2079 for ( i=1; i<size-1; i++ ) { 2080 nz = procsnz[i]; 2081 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2082 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2083 } 2084 /* read in the stuff for the last proc */ 2085 if ( size != 1 ) { 2086 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2087 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2088 for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i; 2089 ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2090 } 2091 PetscFree(cols); 2092 } else { 2093 /* determine buffer space needed for message */ 2094 nz = 0; 2095 for ( i=0; i<m; i++ ) { 2096 nz += locrowlens[i]; 2097 } 2098 ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf); 2099 mycols = ibuf; 2100 /* receive message of column indices*/ 2101 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2102 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2103 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2104 } 2105 2106 /* loop over local rows, determining number of off diagonal entries */ 2107 dlens = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens); 2108 odlens = dlens + (rend-rstart); 2109 mask = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask); 2110 PetscMemzero(mask,3*Mbs*sizeof(int)); 2111 masked1 = mask + Mbs; 2112 masked2 = masked1 + Mbs; 2113 rowcount = 0; nzcount = 0; 2114 for ( i=0; i<mbs; i++ ) { 2115 dcount = 0; 2116 odcount = 0; 2117 for ( j=0; j<bs; j++ ) { 2118 kmax = locrowlens[rowcount]; 2119 for ( k=0; k<kmax; k++ ) { 2120 tmp = mycols[nzcount++]/bs; 2121 if (!mask[tmp]) { 2122 mask[tmp] = 1; 2123 if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp; 2124 else masked1[dcount++] = tmp; 2125 } 2126 } 2127 rowcount++; 2128 } 2129 2130 dlens[i] = dcount; 2131 odlens[i] = odcount; 2132 2133 /* zero out the mask elements we set */ 2134 for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0; 2135 for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0; 2136 } 2137 2138 /* create our matrix */ 2139 ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat); 2140 CHKERRQ(ierr); 2141 A = *newmat; 2142 MatSetOption(A,MAT_COLUMNS_SORTED); 2143 2144 if (!rank) { 2145 buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf); 2146 /* read in my part of the matrix numerical values */ 2147 nz = procsnz[0]; 2148 vals = buf; 2149 mycols = ibuf; 2150 if (size == 1) nz -= extra_rows; 2151 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2152 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2153 2154 /* insert into matrix */ 2155 jj = rstart*bs; 2156 for ( i=0; i<m; i++ ) { 2157 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2158 mycols += locrowlens[i]; 2159 vals += locrowlens[i]; 2160 jj++; 2161 } 2162 /* read in other processors (except the last one) and ship out */ 2163 for ( i=1; i<size-1; i++ ) { 2164 nz = procsnz[i]; 2165 vals = buf; 2166 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2167 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2168 } 2169 /* the last proc */ 2170 if ( size != 1 ){ 2171 nz = procsnz[i] - extra_rows; 2172 vals = buf; 2173 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2174 for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0; 2175 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2176 } 2177 PetscFree(procsnz); 2178 } else { 2179 /* receive numeric values */ 2180 buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf); 2181 2182 /* receive message of values*/ 2183 vals = buf; 2184 mycols = ibuf; 2185 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2186 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2187 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2188 2189 /* insert into matrix */ 2190 jj = rstart*bs; 2191 for ( i=0; i<m; i++ ) { 2192 ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2193 mycols += locrowlens[i]; 2194 vals += locrowlens[i]; 2195 jj++; 2196 } 2197 } 2198 PetscFree(locrowlens); 2199 PetscFree(buf); 2200 PetscFree(ibuf); 2201 PetscFree(rowners); 2202 PetscFree(dlens); 2203 PetscFree(mask); 2204 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2205 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 2210