1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.70 1996/11/13 21:20:56 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 15 /* 16 Adds diagonal pointers to sparse matrix structure. 17 */ 18 19 int MatMarkDiag_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 22 int i,j, *diag, m = a->mbs; 23 24 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 25 PLogObjectMemory(A,(m+1)*sizeof(int)); 26 for ( i=0; i<m; i++ ) { 27 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 28 if (a->j[j] == i) { 29 diag[i] = j; 30 break; 31 } 32 } 33 } 34 a->diag = diag; 35 return 0; 36 } 37 38 #include "draw.h" 39 #include "pinclude/pviewer.h" 40 #include "sys.h" 41 42 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 43 44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 45 PetscTruth *done) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 48 int ierr, n = a->mbs,i; 49 50 *nn = n; 51 if (!ia) return 0; 52 if (symmetric) { 53 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 54 } else if (oshift == 1) { 55 /* temporarily add 1 to i and j indices */ 56 int nz = a->i[n] + 1; 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 *ia = a->i; *ja = a->j; 60 } else { 61 *ia = a->i; *ja = a->j; 62 } 63 64 return 0; 65 } 66 67 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 68 PetscTruth *done) 69 { 70 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 71 int i,n = a->mbs; 72 73 if (!ia) return 0; 74 if (symmetric) { 75 PetscFree(*ia); 76 PetscFree(*ja); 77 } else if (oshift == 1) { 78 int nz = a->i[n]; 79 for ( i=0; i<nz; i++ ) a->j[i]--; 80 for ( i=0; i<n+1; i++ ) a->i[i]--; 81 } 82 return 0; 83 } 84 85 86 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 87 { 88 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 89 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 90 Scalar *aa; 91 92 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 93 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 94 col_lens[0] = MAT_COOKIE; 95 96 col_lens[1] = a->m; 97 col_lens[2] = a->n; 98 col_lens[3] = a->nz*bs2; 99 100 /* store lengths of each row and write (including header) to file */ 101 count = 0; 102 for ( i=0; i<a->mbs; i++ ) { 103 for ( j=0; j<bs; j++ ) { 104 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 105 } 106 } 107 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 108 PetscFree(col_lens); 109 110 /* store column indices (zero start index) */ 111 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 112 count = 0; 113 for ( i=0; i<a->mbs; i++ ) { 114 for ( j=0; j<bs; j++ ) { 115 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 116 for ( l=0; l<bs; l++ ) { 117 jj[count++] = bs*a->j[k] + l; 118 } 119 } 120 } 121 } 122 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 123 PetscFree(jj); 124 125 /* store nonzero values */ 126 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 127 count = 0; 128 for ( i=0; i<a->mbs; i++ ) { 129 for ( j=0; j<bs; j++ ) { 130 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 131 for ( l=0; l<bs; l++ ) { 132 aa[count++] = a->a[bs2*k + l*bs + j]; 133 } 134 } 135 } 136 } 137 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 138 PetscFree(aa); 139 return 0; 140 } 141 142 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 143 { 144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 145 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 146 FILE *fd; 147 char *outputname; 148 149 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 150 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 151 ierr = ViewerGetFormat(viewer,&format); 152 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 153 fprintf(fd," block size is %d\n",bs); 154 } 155 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 156 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 157 } 158 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 159 for ( i=0; i<a->mbs; i++ ) { 160 for ( j=0; j<bs; j++ ) { 161 fprintf(fd,"row %d:",i*bs+j); 162 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 163 for ( l=0; l<bs; l++ ) { 164 #if defined(PETSC_COMPLEX) 165 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 166 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 167 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 168 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 169 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 170 #else 171 if (a->a[bs2*k + l*bs + j] != 0.0) 172 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 173 #endif 174 } 175 } 176 fprintf(fd,"\n"); 177 } 178 } 179 } 180 else { 181 for ( i=0; i<a->mbs; i++ ) { 182 for ( j=0; j<bs; j++ ) { 183 fprintf(fd,"row %d:",i*bs+j); 184 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 185 for ( l=0; l<bs; l++ ) { 186 #if defined(PETSC_COMPLEX) 187 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 188 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 189 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 190 } 191 else { 192 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 193 } 194 #else 195 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 196 #endif 197 } 198 } 199 fprintf(fd,"\n"); 200 } 201 } 202 } 203 fflush(fd); 204 return 0; 205 } 206 207 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 208 { 209 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 210 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 211 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 212 Scalar *aa; 213 Draw draw; 214 DrawButton button; 215 PetscTruth isnull; 216 217 ViewerDrawGetDraw(viewer,&draw); 218 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 219 220 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 221 xr += w; yr += h; xl = -w; yl = -h; 222 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 223 /* loop over matrix elements drawing boxes */ 224 color = DRAW_BLUE; 225 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 226 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 227 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 228 x_l = a->j[j]*bs; x_r = x_l + 1.0; 229 aa = a->a + j*bs2; 230 for ( k=0; k<bs; k++ ) { 231 for ( l=0; l<bs; l++ ) { 232 if (PetscReal(*aa++) >= 0.) continue; 233 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 234 } 235 } 236 } 237 } 238 color = DRAW_CYAN; 239 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 240 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 241 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 242 x_l = a->j[j]*bs; x_r = x_l + 1.0; 243 aa = a->a + j*bs2; 244 for ( k=0; k<bs; k++ ) { 245 for ( l=0; l<bs; l++ ) { 246 if (PetscReal(*aa++) != 0.) continue; 247 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 248 } 249 } 250 } 251 } 252 253 color = DRAW_RED; 254 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 255 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 256 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 257 x_l = a->j[j]*bs; x_r = x_l + 1.0; 258 aa = a->a + j*bs2; 259 for ( k=0; k<bs; k++ ) { 260 for ( l=0; l<bs; l++ ) { 261 if (PetscReal(*aa++) <= 0.) continue; 262 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 263 } 264 } 265 } 266 } 267 268 DrawFlush(draw); 269 DrawGetPause(draw,&pause); 270 if (pause >= 0) { PetscSleep(pause); return 0;} 271 272 /* allow the matrix to zoom or shrink */ 273 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 274 while (button != BUTTON_RIGHT) { 275 DrawClear(draw); 276 if (button == BUTTON_LEFT) scale = .5; 277 else if (button == BUTTON_CENTER) scale = 2.; 278 xl = scale*(xl + w - xc) + xc - w*scale; 279 xr = scale*(xr - w - xc) + xc + w*scale; 280 yl = scale*(yl + h - yc) + yc - h*scale; 281 yr = scale*(yr - h - yc) + yc + h*scale; 282 w *= scale; h *= scale; 283 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 284 color = DRAW_BLUE; 285 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 286 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 287 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 288 x_l = a->j[j]*bs; x_r = x_l + 1.0; 289 aa = a->a + j*bs2; 290 for ( k=0; k<bs; k++ ) { 291 for ( l=0; l<bs; l++ ) { 292 if (PetscReal(*aa++) >= 0.) continue; 293 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 294 } 295 } 296 } 297 } 298 color = DRAW_CYAN; 299 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 300 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 301 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 302 x_l = a->j[j]*bs; x_r = x_l + 1.0; 303 aa = a->a + j*bs2; 304 for ( k=0; k<bs; k++ ) { 305 for ( l=0; l<bs; l++ ) { 306 if (PetscReal(*aa++) != 0.) continue; 307 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 308 } 309 } 310 } 311 } 312 313 color = DRAW_RED; 314 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 315 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 316 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 317 x_l = a->j[j]*bs; x_r = x_l + 1.0; 318 aa = a->a + j*bs2; 319 for ( k=0; k<bs; k++ ) { 320 for ( l=0; l<bs; l++ ) { 321 if (PetscReal(*aa++) <= 0.) continue; 322 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 323 } 324 } 325 } 326 } 327 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 328 } 329 return 0; 330 } 331 332 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 333 { 334 Mat A = (Mat) obj; 335 ViewerType vtype; 336 int ierr; 337 338 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 339 if (vtype == MATLAB_VIEWER) { 340 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 341 } 342 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 343 return MatView_SeqBAIJ_ASCII(A,viewer); 344 } 345 else if (vtype == BINARY_FILE_VIEWER) { 346 return MatView_SeqBAIJ_Binary(A,viewer); 347 } 348 else if (vtype == DRAW_VIEWER) { 349 return MatView_SeqBAIJ_Draw(A,viewer); 350 } 351 return 0; 352 } 353 354 #define CHUNKSIZE 10 355 356 /* This version has row oriented v */ 357 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 358 { 359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 360 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 361 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 362 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 363 int ridx,cidx,bs2=a->bs2; 364 Scalar *ap,value,*aa=a->a,*bap; 365 366 for ( k=0; k<m; k++ ) { /* loop over added rows */ 367 row = im[k]; brow = row/bs; 368 #if defined(PETSC_BOPT_g) 369 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 370 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 371 #endif 372 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 373 rmax = imax[brow]; nrow = ailen[brow]; 374 low = 0; 375 for ( l=0; l<n; l++ ) { /* loop over added columns */ 376 #if defined(PETSC_BOPT_g) 377 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 378 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 379 #endif 380 col = in[l]; bcol = col/bs; 381 ridx = row % bs; cidx = col % bs; 382 if (roworiented) { 383 value = *v++; 384 } 385 else { 386 value = v[k + l*m]; 387 } 388 if (!sorted) low = 0; high = nrow; 389 while (high-low > 5) { 390 t = (low+high)/2; 391 if (rp[t] > bcol) high = t; 392 else low = t; 393 } 394 for ( i=low; i<high; i++ ) { 395 if (rp[i] > bcol) break; 396 if (rp[i] == bcol) { 397 bap = ap + bs2*i + bs*cidx + ridx; 398 if (is == ADD_VALUES) *bap += value; 399 else *bap = value; 400 goto noinsert; 401 } 402 } 403 if (nonew) goto noinsert; 404 if (nrow >= rmax) { 405 /* there is no extra room in row, therefore enlarge */ 406 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 407 Scalar *new_a; 408 409 /* malloc new storage space */ 410 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 411 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 412 new_j = (int *) (new_a + bs2*new_nz); 413 new_i = new_j + new_nz; 414 415 /* copy over old data into new slots */ 416 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 417 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 418 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 419 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 420 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 421 len*sizeof(int)); 422 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 423 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 424 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 425 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 426 /* free up old matrix storage */ 427 PetscFree(a->a); 428 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 429 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 430 a->singlemalloc = 1; 431 432 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 433 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 434 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 435 a->maxnz += bs2*CHUNKSIZE; 436 a->reallocs++; 437 a->nz++; 438 } 439 N = nrow++ - 1; 440 /* shift up all the later entries in this row */ 441 for ( ii=N; ii>=i; ii-- ) { 442 rp[ii+1] = rp[ii]; 443 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 444 } 445 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 446 rp[i] = bcol; 447 ap[bs2*i + bs*cidx + ridx] = value; 448 noinsert:; 449 low = i; 450 } 451 ailen[brow] = nrow; 452 } 453 return 0; 454 } 455 456 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 457 { 458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 459 *m = a->m; *n = a->n; 460 return 0; 461 } 462 463 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 464 { 465 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 466 *m = 0; *n = a->m; 467 return 0; 468 } 469 470 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 471 { 472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 473 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 474 Scalar *aa,*v_i,*aa_i; 475 476 bs = a->bs; 477 ai = a->i; 478 aj = a->j; 479 aa = a->a; 480 bs2 = a->bs2; 481 482 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 483 484 bn = row/bs; /* Block number */ 485 bp = row % bs; /* Block Position */ 486 M = ai[bn+1] - ai[bn]; 487 *nz = bs*M; 488 489 if (v) { 490 *v = 0; 491 if (*nz) { 492 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 493 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 494 v_i = *v + i*bs; 495 aa_i = aa + bs2*(ai[bn] + i); 496 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 497 } 498 } 499 } 500 501 if (idx) { 502 *idx = 0; 503 if (*nz) { 504 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 505 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 506 idx_i = *idx + i*bs; 507 itmp = bs*aj[ai[bn] + i]; 508 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 509 } 510 } 511 } 512 return 0; 513 } 514 515 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 516 { 517 if (idx) {if (*idx) PetscFree(*idx);} 518 if (v) {if (*v) PetscFree(*v);} 519 return 0; 520 } 521 522 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 523 { 524 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 525 Mat C; 526 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 527 int *rows,*cols,bs2=a->bs2; 528 Scalar *array=a->a; 529 530 if (B==PETSC_NULL && mbs!=nbs) 531 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 532 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 533 PetscMemzero(col,(1+nbs)*sizeof(int)); 534 535 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 536 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 537 PetscFree(col); 538 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 539 cols = rows + bs; 540 for ( i=0; i<mbs; i++ ) { 541 cols[0] = i*bs; 542 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 543 len = ai[i+1] - ai[i]; 544 for ( j=0; j<len; j++ ) { 545 rows[0] = (*aj++)*bs; 546 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 547 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 548 array += bs2; 549 } 550 } 551 PetscFree(rows); 552 553 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 555 556 if (B != PETSC_NULL) { 557 *B = C; 558 } else { 559 /* This isn't really an in-place transpose */ 560 PetscFree(a->a); 561 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 562 if (a->diag) PetscFree(a->diag); 563 if (a->ilen) PetscFree(a->ilen); 564 if (a->imax) PetscFree(a->imax); 565 if (a->solve_work) PetscFree(a->solve_work); 566 PetscFree(a); 567 PetscMemcpy(A,C,sizeof(struct _Mat)); 568 PetscHeaderDestroy(C); 569 } 570 return 0; 571 } 572 573 574 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 575 { 576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 577 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 578 int m = a->m,*ip, N, *ailen = a->ilen; 579 int mbs = a->mbs, bs2 = a->bs2; 580 Scalar *aa = a->a, *ap; 581 582 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 583 584 for ( i=1; i<mbs; i++ ) { 585 /* move each row back by the amount of empty slots (fshift) before it*/ 586 fshift += imax[i-1] - ailen[i-1]; 587 if (fshift) { 588 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 589 N = ailen[i]; 590 for ( j=0; j<N; j++ ) { 591 ip[j-fshift] = ip[j]; 592 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 593 } 594 } 595 ai[i] = ai[i-1] + ailen[i-1]; 596 } 597 if (mbs) { 598 fshift += imax[mbs-1] - ailen[mbs-1]; 599 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 600 } 601 /* reset ilen and imax for each row */ 602 for ( i=0; i<mbs; i++ ) { 603 ailen[i] = imax[i] = ai[i+1] - ai[i]; 604 } 605 a->nz = ai[mbs]; 606 607 /* diagonals may have moved, so kill the diagonal pointers */ 608 if (fshift && a->diag) { 609 PetscFree(a->diag); 610 PLogObjectMemory(A,-(m+1)*sizeof(int)); 611 a->diag = 0; 612 } 613 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n", 614 fshift*bs2,a->nz*bs2,m,a->bs); 615 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 616 a->reallocs); 617 A->info.nz_unneeded = (double)fshift*bs2; 618 619 return 0; 620 } 621 622 static int MatZeroEntries_SeqBAIJ(Mat A) 623 { 624 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 625 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 626 return 0; 627 } 628 629 int MatDestroy_SeqBAIJ(PetscObject obj) 630 { 631 Mat A = (Mat) obj; 632 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 633 int ierr; 634 635 #if defined(PETSC_LOG) 636 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 637 #endif 638 PetscFree(a->a); 639 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 640 if (a->diag) PetscFree(a->diag); 641 if (a->ilen) PetscFree(a->ilen); 642 if (a->imax) PetscFree(a->imax); 643 if (a->solve_work) PetscFree(a->solve_work); 644 if (a->mult_work) PetscFree(a->mult_work); 645 PetscFree(a); 646 if (A->mapping) { 647 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 648 } 649 PLogObjectDestroy(A); 650 PetscHeaderDestroy(A); 651 return 0; 652 } 653 654 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 655 { 656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 657 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 658 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 659 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 660 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 661 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 662 else if (op == MAT_ROWS_SORTED || 663 op == MAT_SYMMETRIC || 664 op == MAT_STRUCTURALLY_SYMMETRIC || 665 op == MAT_YES_NEW_DIAGONALS || 666 op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 667 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 668 else if (op == MAT_NO_NEW_DIAGONALS) 669 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 670 else 671 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 672 return 0; 673 } 674 675 676 /* -------------------------------------------------------*/ 677 /* Should check that shapes of vectors and matrices match */ 678 /* -------------------------------------------------------*/ 679 #include "pinclude/plapack.h" 680 681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 682 { 683 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 684 register Scalar *x,*z,*v,sum; 685 int mbs=a->mbs,i,*idx,*ii,n; 686 687 VecGetArray_Fast(xx,x); 688 VecGetArray_Fast(zz,z); 689 690 idx = a->j; 691 v = a->a; 692 ii = a->i; 693 694 for ( i=0; i<mbs; i++ ) { 695 n = ii[1] - ii[0]; ii++; 696 sum = 0.0; 697 while (n--) sum += *v++ * x[*idx++]; 698 z[i] = sum; 699 } 700 VecRestoreArray_Fast(xx,x); 701 VecRestoreArray_Fast(zz,z); 702 PLogFlops(2*a->nz - a->m); 703 return 0; 704 } 705 706 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 707 { 708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 709 register Scalar *x,*z,*v,*xb,sum1,sum2; 710 register Scalar x1,x2; 711 int mbs=a->mbs,i,*idx,*ii; 712 int j,n; 713 714 VecGetArray_Fast(xx,x); 715 VecGetArray_Fast(zz,z); 716 717 idx = a->j; 718 v = a->a; 719 ii = a->i; 720 721 for ( i=0; i<mbs; i++ ) { 722 n = ii[1] - ii[0]; ii++; 723 sum1 = 0.0; sum2 = 0.0; 724 for ( j=0; j<n; j++ ) { 725 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 726 sum1 += v[0]*x1 + v[2]*x2; 727 sum2 += v[1]*x1 + v[3]*x2; 728 v += 4; 729 } 730 z[0] = sum1; z[1] = sum2; 731 z += 2; 732 } 733 VecRestoreArray_Fast(xx,x); 734 VecRestoreArray_Fast(zz,z); 735 PLogFlops(4*a->nz - a->m); 736 return 0; 737 } 738 739 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 740 { 741 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 742 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 743 int mbs=a->mbs,i,*idx,*ii,j,n; 744 745 VecGetArray_Fast(xx,x); 746 VecGetArray_Fast(zz,z); 747 748 idx = a->j; 749 v = a->a; 750 ii = a->i; 751 752 for ( i=0; i<mbs; i++ ) { 753 n = ii[1] - ii[0]; ii++; 754 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 755 for ( j=0; j<n; j++ ) { 756 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 757 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 758 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 759 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 760 v += 9; 761 } 762 z[0] = sum1; z[1] = sum2; z[2] = sum3; 763 z += 3; 764 } 765 VecRestoreArray_Fast(xx,x); 766 VecRestoreArray_Fast(zz,z); 767 PLogFlops(18*a->nz - a->m); 768 return 0; 769 } 770 771 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 772 { 773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 774 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 775 register Scalar x1,x2,x3,x4; 776 int mbs=a->mbs,i,*idx,*ii; 777 int j,n; 778 779 VecGetArray_Fast(xx,x); 780 VecGetArray_Fast(zz,z); 781 782 idx = a->j; 783 v = a->a; 784 ii = a->i; 785 786 for ( i=0; i<mbs; i++ ) { 787 n = ii[1] - ii[0]; ii++; 788 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 789 for ( j=0; j<n; j++ ) { 790 xb = x + 4*(*idx++); 791 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 792 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 793 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 794 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 795 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 796 v += 16; 797 } 798 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 799 z += 4; 800 } 801 VecRestoreArray_Fast(xx,x); 802 VecRestoreArray_Fast(zz,z); 803 PLogFlops(32*a->nz - a->m); 804 return 0; 805 } 806 807 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 810 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 811 register Scalar x1,x2,x3,x4,x5; 812 int mbs=a->mbs,i,*idx,*ii,j,n; 813 814 VecGetArray_Fast(xx,x); 815 VecGetArray_Fast(zz,z); 816 817 idx = a->j; 818 v = a->a; 819 ii = a->i; 820 821 for ( i=0; i<mbs; i++ ) { 822 n = ii[1] - ii[0]; ii++; 823 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 824 for ( j=0; j<n; j++ ) { 825 xb = x + 5*(*idx++); 826 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 827 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 828 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 829 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 830 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 831 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 832 v += 25; 833 } 834 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 835 z += 5; 836 } 837 VecRestoreArray_Fast(xx,x); 838 VecRestoreArray_Fast(zz,z); 839 PLogFlops(50*a->nz - a->m); 840 return 0; 841 } 842 843 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 844 { 845 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 846 register Scalar *x,*z,*v,*xb; 847 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 848 int _One = 1,ncols,k; 849 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 850 851 VecGetArray_Fast(xx,x); 852 VecGetArray_Fast(zz,z); 853 854 idx = a->j; 855 v = a->a; 856 ii = a->i; 857 858 859 if (!a->mult_work) { 860 k = PetscMax(a->m,a->n); 861 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 862 } 863 work = a->mult_work; 864 for ( i=0; i<mbs; i++ ) { 865 n = ii[1] - ii[0]; ii++; 866 ncols = n*bs; 867 workt = work; 868 for ( j=0; j<n; j++ ) { 869 xb = x + bs*(*idx++); 870 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 871 workt += bs; 872 } 873 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 874 v += n*bs2; 875 z += bs; 876 } 877 VecRestoreArray_Fast(xx,x); 878 VecRestoreArray_Fast(zz,z); 879 PLogFlops(2*a->nz*bs2 - a->m); 880 return 0; 881 } 882 883 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 884 { 885 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 886 register Scalar *x,*y,*z,*v,sum; 887 int mbs=a->mbs,i,*idx,*ii,n; 888 889 VecGetArray_Fast(xx,x); 890 VecGetArray_Fast(yy,y); 891 VecGetArray_Fast(zz,z); 892 893 idx = a->j; 894 v = a->a; 895 ii = a->i; 896 897 for ( i=0; i<mbs; i++ ) { 898 n = ii[1] - ii[0]; ii++; 899 sum = y[i]; 900 while (n--) sum += *v++ * x[*idx++]; 901 z[i] = sum; 902 } 903 VecRestoreArray_Fast(xx,x); 904 VecRestoreArray_Fast(yy,y); 905 VecRestoreArray_Fast(zz,z); 906 PLogFlops(2*a->nz); 907 return 0; 908 } 909 910 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 911 { 912 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 913 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 914 register Scalar x1,x2; 915 int mbs=a->mbs,i,*idx,*ii; 916 int j,n; 917 918 VecGetArray_Fast(xx,x); 919 VecGetArray_Fast(yy,y); 920 VecGetArray_Fast(zz,z); 921 922 idx = a->j; 923 v = a->a; 924 ii = a->i; 925 926 for ( i=0; i<mbs; i++ ) { 927 n = ii[1] - ii[0]; ii++; 928 sum1 = y[0]; sum2 = y[1]; 929 for ( j=0; j<n; j++ ) { 930 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 931 sum1 += v[0]*x1 + v[2]*x2; 932 sum2 += v[1]*x1 + v[3]*x2; 933 v += 4; 934 } 935 z[0] = sum1; z[1] = sum2; 936 z += 2; y += 2; 937 } 938 VecRestoreArray_Fast(xx,x); 939 VecRestoreArray_Fast(yy,y); 940 VecRestoreArray_Fast(zz,z); 941 PLogFlops(4*a->nz); 942 return 0; 943 } 944 945 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 946 { 947 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 948 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 949 int mbs=a->mbs,i,*idx,*ii,j,n; 950 951 VecGetArray_Fast(xx,x); 952 VecGetArray_Fast(yy,y); 953 VecGetArray_Fast(zz,z); 954 955 idx = a->j; 956 v = a->a; 957 ii = a->i; 958 959 for ( i=0; i<mbs; i++ ) { 960 n = ii[1] - ii[0]; ii++; 961 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 962 for ( j=0; j<n; j++ ) { 963 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 964 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 965 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 966 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 967 v += 9; 968 } 969 z[0] = sum1; z[1] = sum2; z[2] = sum3; 970 z += 3; y += 3; 971 } 972 VecRestoreArray_Fast(xx,x); 973 VecRestoreArray_Fast(yy,y); 974 VecRestoreArray_Fast(zz,z); 975 PLogFlops(18*a->nz); 976 return 0; 977 } 978 979 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 980 { 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 982 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 983 register Scalar x1,x2,x3,x4; 984 int mbs=a->mbs,i,*idx,*ii; 985 int j,n; 986 987 VecGetArray_Fast(xx,x); 988 VecGetArray_Fast(yy,y); 989 VecGetArray_Fast(zz,z); 990 991 idx = a->j; 992 v = a->a; 993 ii = a->i; 994 995 for ( i=0; i<mbs; i++ ) { 996 n = ii[1] - ii[0]; ii++; 997 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 998 for ( j=0; j<n; j++ ) { 999 xb = x + 4*(*idx++); 1000 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1001 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1002 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1003 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1004 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1005 v += 16; 1006 } 1007 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1008 z += 4; y += 4; 1009 } 1010 VecRestoreArray_Fast(xx,x); 1011 VecRestoreArray_Fast(yy,y); 1012 VecRestoreArray_Fast(zz,z); 1013 PLogFlops(32*a->nz); 1014 return 0; 1015 } 1016 1017 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1018 { 1019 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1020 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1021 register Scalar x1,x2,x3,x4,x5; 1022 int mbs=a->mbs,i,*idx,*ii,j,n; 1023 1024 VecGetArray_Fast(xx,x); 1025 VecGetArray_Fast(yy,y); 1026 VecGetArray_Fast(zz,z); 1027 1028 idx = a->j; 1029 v = a->a; 1030 ii = a->i; 1031 1032 for ( i=0; i<mbs; i++ ) { 1033 n = ii[1] - ii[0]; ii++; 1034 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1035 for ( j=0; j<n; j++ ) { 1036 xb = x + 5*(*idx++); 1037 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1038 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1039 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1040 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1041 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1042 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1043 v += 25; 1044 } 1045 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1046 z += 5; y += 5; 1047 } 1048 VecRestoreArray_Fast(xx,x); 1049 VecRestoreArray_Fast(yy,y); 1050 VecRestoreArray_Fast(zz,z); 1051 PLogFlops(50*a->nz); 1052 return 0; 1053 } 1054 1055 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1056 { 1057 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1058 register Scalar *x,*z,*v,*xb; 1059 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1060 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1061 1062 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1063 1064 VecGetArray_Fast(xx,x); 1065 VecGetArray_Fast(zz,z); 1066 1067 idx = a->j; 1068 v = a->a; 1069 ii = a->i; 1070 1071 1072 if (!a->mult_work) { 1073 k = PetscMax(a->m,a->n); 1074 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1075 } 1076 work = a->mult_work; 1077 for ( i=0; i<mbs; i++ ) { 1078 n = ii[1] - ii[0]; ii++; 1079 ncols = n*bs; 1080 workt = work; 1081 for ( j=0; j<n; j++ ) { 1082 xb = x + bs*(*idx++); 1083 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1084 workt += bs; 1085 } 1086 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1087 v += n*bs2; 1088 z += bs; 1089 } 1090 VecRestoreArray_Fast(xx,x); 1091 VecRestoreArray_Fast(zz,z); 1092 PLogFlops(2*a->nz*bs2 ); 1093 return 0; 1094 } 1095 1096 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1097 { 1098 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1099 Scalar *xg,*zg,*zb; 1100 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1101 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1102 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1103 1104 1105 VecGetArray_Fast(xx,xg); x = xg; 1106 VecGetArray_Fast(zz,zg); z = zg; 1107 PetscMemzero(z,N*sizeof(Scalar)); 1108 1109 idx = a->j; 1110 v = a->a; 1111 ii = a->i; 1112 1113 switch (bs) { 1114 case 1: 1115 for ( i=0; i<mbs; i++ ) { 1116 n = ii[1] - ii[0]; ii++; 1117 xb = x + i; x1 = xb[0]; 1118 ib = idx + ai[i]; 1119 for ( j=0; j<n; j++ ) { 1120 rval = ib[j]; 1121 z[rval] += *v++ * x1; 1122 } 1123 } 1124 break; 1125 case 2: 1126 for ( i=0; i<mbs; i++ ) { 1127 n = ii[1] - ii[0]; ii++; 1128 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1129 ib = idx + ai[i]; 1130 for ( j=0; j<n; j++ ) { 1131 rval = ib[j]*2; 1132 z[rval++] += v[0]*x1 + v[1]*x2; 1133 z[rval++] += v[2]*x1 + v[3]*x2; 1134 v += 4; 1135 } 1136 } 1137 break; 1138 case 3: 1139 for ( i=0; i<mbs; i++ ) { 1140 n = ii[1] - ii[0]; ii++; 1141 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1142 ib = idx + ai[i]; 1143 for ( j=0; j<n; j++ ) { 1144 rval = ib[j]*3; 1145 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1146 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1147 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1148 v += 9; 1149 } 1150 } 1151 break; 1152 case 4: 1153 for ( i=0; i<mbs; i++ ) { 1154 n = ii[1] - ii[0]; ii++; 1155 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1156 ib = idx + ai[i]; 1157 for ( j=0; j<n; j++ ) { 1158 rval = ib[j]*4; 1159 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1160 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1161 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1162 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1163 v += 16; 1164 } 1165 } 1166 break; 1167 case 5: 1168 for ( i=0; i<mbs; i++ ) { 1169 n = ii[1] - ii[0]; ii++; 1170 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1171 x4 = xb[3]; x5 = xb[4]; 1172 ib = idx + ai[i]; 1173 for ( j=0; j<n; j++ ) { 1174 rval = ib[j]*5; 1175 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1176 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1177 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1178 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1179 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1180 v += 25; 1181 } 1182 } 1183 break; 1184 /* block sizes larger then 5 by 5 are handled by BLAS */ 1185 default: { 1186 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1187 if (!a->mult_work) { 1188 k = PetscMax(a->m,a->n); 1189 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1190 CHKPTRQ(a->mult_work); 1191 } 1192 work = a->mult_work; 1193 for ( i=0; i<mbs; i++ ) { 1194 n = ii[1] - ii[0]; ii++; 1195 ncols = n*bs; 1196 PetscMemzero(work,ncols*sizeof(Scalar)); 1197 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1198 v += n*bs2; 1199 x += bs; 1200 workt = work; 1201 for ( j=0; j<n; j++ ) { 1202 zb = z + bs*(*idx++); 1203 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1204 workt += bs; 1205 } 1206 } 1207 } 1208 } 1209 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1210 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1211 PLogFlops(2*a->nz*a->bs2 - a->n); 1212 return 0; 1213 } 1214 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1217 Scalar *xg,*zg,*zb; 1218 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1219 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1220 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1221 1222 1223 1224 VecGetArray_Fast(xx,xg); x = xg; 1225 VecGetArray_Fast(zz,zg); z = zg; 1226 1227 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1228 else PetscMemzero(z,N*sizeof(Scalar)); 1229 1230 1231 idx = a->j; 1232 v = a->a; 1233 ii = a->i; 1234 1235 switch (bs) { 1236 case 1: 1237 for ( i=0; i<mbs; i++ ) { 1238 n = ii[1] - ii[0]; ii++; 1239 xb = x + i; x1 = xb[0]; 1240 ib = idx + ai[i]; 1241 for ( j=0; j<n; j++ ) { 1242 rval = ib[j]; 1243 z[rval] += *v++ * x1; 1244 } 1245 } 1246 break; 1247 case 2: 1248 for ( i=0; i<mbs; i++ ) { 1249 n = ii[1] - ii[0]; ii++; 1250 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1251 ib = idx + ai[i]; 1252 for ( j=0; j<n; j++ ) { 1253 rval = ib[j]*2; 1254 z[rval++] += v[0]*x1 + v[1]*x2; 1255 z[rval++] += v[2]*x1 + v[3]*x2; 1256 v += 4; 1257 } 1258 } 1259 break; 1260 case 3: 1261 for ( i=0; i<mbs; i++ ) { 1262 n = ii[1] - ii[0]; ii++; 1263 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1264 ib = idx + ai[i]; 1265 for ( j=0; j<n; j++ ) { 1266 rval = ib[j]*3; 1267 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1268 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1269 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1270 v += 9; 1271 } 1272 } 1273 break; 1274 case 4: 1275 for ( i=0; i<mbs; i++ ) { 1276 n = ii[1] - ii[0]; ii++; 1277 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1278 ib = idx + ai[i]; 1279 for ( j=0; j<n; j++ ) { 1280 rval = ib[j]*4; 1281 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1282 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1283 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1284 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1285 v += 16; 1286 } 1287 } 1288 break; 1289 case 5: 1290 for ( i=0; i<mbs; i++ ) { 1291 n = ii[1] - ii[0]; ii++; 1292 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1293 x4 = xb[3]; x5 = xb[4]; 1294 ib = idx + ai[i]; 1295 for ( j=0; j<n; j++ ) { 1296 rval = ib[j]*5; 1297 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1298 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1299 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1300 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1301 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1302 v += 25; 1303 } 1304 } 1305 break; 1306 /* block sizes larger then 5 by 5 are handled by BLAS */ 1307 default: { 1308 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1309 if (!a->mult_work) { 1310 k = PetscMax(a->m,a->n); 1311 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1312 CHKPTRQ(a->mult_work); 1313 } 1314 work = a->mult_work; 1315 for ( i=0; i<mbs; i++ ) { 1316 n = ii[1] - ii[0]; ii++; 1317 ncols = n*bs; 1318 PetscMemzero(work,ncols*sizeof(Scalar)); 1319 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1320 v += n*bs2; 1321 x += bs; 1322 workt = work; 1323 for ( j=0; j<n; j++ ) { 1324 zb = z + bs*(*idx++); 1325 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1326 workt += bs; 1327 } 1328 } 1329 } 1330 } 1331 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1332 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1333 PLogFlops(2*a->nz*a->bs2); 1334 return 0; 1335 } 1336 1337 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1338 { 1339 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1340 1341 info->rows_global = (double)a->m; 1342 info->columns_global = (double)a->n; 1343 info->rows_local = (double)a->m; 1344 info->columns_local = (double)a->n; 1345 info->block_size = a->bs2; 1346 info->nz_allocated = a->maxnz; 1347 info->nz_used = a->bs2*a->nz; 1348 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1349 /* if (info->nz_unneeded != A->info.nz_unneeded) 1350 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1351 info->assemblies = A->num_ass; 1352 info->mallocs = a->reallocs; 1353 info->memory = A->mem; 1354 if (A->factor) { 1355 info->fill_ratio_given = A->info.fill_ratio_given; 1356 info->fill_ratio_needed = A->info.fill_ratio_needed; 1357 info->factor_mallocs = A->info.factor_mallocs; 1358 } else { 1359 info->fill_ratio_given = 0; 1360 info->fill_ratio_needed = 0; 1361 info->factor_mallocs = 0; 1362 } 1363 return 0; 1364 } 1365 1366 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1367 { 1368 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1369 1370 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1371 1372 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1373 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1374 (a->nz != b->nz)) { 1375 *flg = PETSC_FALSE; return 0; 1376 } 1377 1378 /* if the a->i are the same */ 1379 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1380 *flg = PETSC_FALSE; return 0; 1381 } 1382 1383 /* if a->j are the same */ 1384 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1385 *flg = PETSC_FALSE; return 0; 1386 } 1387 1388 /* if a->a are the same */ 1389 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1390 *flg = PETSC_FALSE; return 0; 1391 } 1392 *flg = PETSC_TRUE; 1393 return 0; 1394 1395 } 1396 1397 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1398 { 1399 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1400 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1401 Scalar *x, zero = 0.0,*aa,*aa_j; 1402 1403 bs = a->bs; 1404 aa = a->a; 1405 ai = a->i; 1406 aj = a->j; 1407 ambs = a->mbs; 1408 bs2 = a->bs2; 1409 1410 VecSet(&zero,v); 1411 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1412 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1413 for ( i=0; i<ambs; i++ ) { 1414 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1415 if (aj[j] == i) { 1416 row = i*bs; 1417 aa_j = aa+j*bs2; 1418 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1419 break; 1420 } 1421 } 1422 } 1423 return 0; 1424 } 1425 1426 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1427 { 1428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1429 Scalar *l,*r,x,*v,*aa,*li,*ri; 1430 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1431 1432 ai = a->i; 1433 aj = a->j; 1434 aa = a->a; 1435 m = a->m; 1436 n = a->n; 1437 bs = a->bs; 1438 mbs = a->mbs; 1439 bs2 = a->bs2; 1440 if (ll) { 1441 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1442 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1443 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1444 M = ai[i+1] - ai[i]; 1445 li = l + i*bs; 1446 v = aa + bs2*ai[i]; 1447 for ( j=0; j<M; j++ ) { /* for each block */ 1448 for ( k=0; k<bs2; k++ ) { 1449 (*v++) *= li[k%bs]; 1450 } 1451 } 1452 } 1453 } 1454 1455 if (rr) { 1456 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1457 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1458 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1459 M = ai[i+1] - ai[i]; 1460 v = aa + bs2*ai[i]; 1461 for ( j=0; j<M; j++ ) { /* for each block */ 1462 ri = r + bs*aj[ai[i]+j]; 1463 for ( k=0; k<bs; k++ ) { 1464 x = ri[k]; 1465 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1466 } 1467 } 1468 } 1469 } 1470 return 0; 1471 } 1472 1473 1474 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1475 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1476 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1477 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1478 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1479 1480 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1481 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1482 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1483 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1484 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1485 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1486 1487 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1488 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1489 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1490 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1491 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1492 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1493 1494 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1495 { 1496 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1497 Scalar *v = a->a; 1498 double sum = 0.0; 1499 int i,nz=a->nz,bs2=a->bs2; 1500 1501 if (type == NORM_FROBENIUS) { 1502 for (i=0; i< bs2*nz; i++ ) { 1503 #if defined(PETSC_COMPLEX) 1504 sum += real(conj(*v)*(*v)); v++; 1505 #else 1506 sum += (*v)*(*v); v++; 1507 #endif 1508 } 1509 *norm = sqrt(sum); 1510 } 1511 else { 1512 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1513 } 1514 return 0; 1515 } 1516 1517 /* 1518 note: This can only work for identity for row and col. It would 1519 be good to check this and otherwise generate an error. 1520 */ 1521 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1522 { 1523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1524 Mat outA; 1525 int ierr; 1526 1527 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1528 1529 outA = inA; 1530 inA->factor = FACTOR_LU; 1531 a->row = row; 1532 a->col = col; 1533 1534 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1535 1536 if (!a->diag) { 1537 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1538 } 1539 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1540 return 0; 1541 } 1542 1543 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1544 { 1545 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1546 int one = 1, totalnz = a->bs2*a->nz; 1547 BLscal_( &totalnz, alpha, a->a, &one ); 1548 PLogFlops(totalnz); 1549 return 0; 1550 } 1551 1552 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1553 { 1554 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1555 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1556 int *ai = a->i, *ailen = a->ilen; 1557 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1558 Scalar *ap, *aa = a->a, zero = 0.0; 1559 1560 for ( k=0; k<m; k++ ) { /* loop over rows */ 1561 row = im[k]; brow = row/bs; 1562 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1563 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1564 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1565 nrow = ailen[brow]; 1566 for ( l=0; l<n; l++ ) { /* loop over columns */ 1567 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1568 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1569 col = in[l] ; 1570 bcol = col/bs; 1571 cidx = col%bs; 1572 ridx = row%bs; 1573 high = nrow; 1574 low = 0; /* assume unsorted */ 1575 while (high-low > 5) { 1576 t = (low+high)/2; 1577 if (rp[t] > bcol) high = t; 1578 else low = t; 1579 } 1580 for ( i=low; i<high; i++ ) { 1581 if (rp[i] > bcol) break; 1582 if (rp[i] == bcol) { 1583 *v++ = ap[bs2*i+bs*cidx+ridx]; 1584 goto finished; 1585 } 1586 } 1587 *v++ = zero; 1588 finished:; 1589 } 1590 } 1591 return 0; 1592 } 1593 1594 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1595 { 1596 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1597 *bs = baij->bs; 1598 return 0; 1599 } 1600 1601 /* idx should be of length atlease bs */ 1602 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1603 { 1604 int i,row; 1605 row = idx[0]; 1606 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1607 1608 for ( i=1; i<bs; i++ ) { 1609 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1610 } 1611 *flg = PETSC_TRUE; 1612 return 0; 1613 } 1614 1615 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1616 { 1617 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1618 IS is_local; 1619 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1620 PetscTruth flg; 1621 Scalar zero = 0.0,*aa; 1622 1623 /* Make a copy of the IS and sort it */ 1624 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1625 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1626 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1627 ierr = ISSort(is_local); CHKERRQ(ierr); 1628 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1629 1630 i = 0; 1631 while (i < is_n) { 1632 if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1633 flg = PETSC_FALSE; 1634 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1635 if (flg) { /* There exists a block of rows to be Zerowed */ 1636 baij->ilen[rows[i]/bs] = 0; 1637 i += bs; 1638 } else { /* Zero out only the requested row */ 1639 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1640 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1641 for ( j=0; j<count; j++ ) { 1642 aa[0] = zero; 1643 aa+=bs; 1644 } 1645 i++; 1646 } 1647 } 1648 if (diag) { 1649 for ( j=0; j<is_n; j++ ) { 1650 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1651 } 1652 } 1653 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1654 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1655 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1656 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1657 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1658 1659 return 0; 1660 } 1661 int MatPrintHelp_SeqBAIJ(Mat A) 1662 { 1663 static int called = 0; 1664 MPI_Comm comm = A->comm; 1665 1666 if (called) return 0; else called = 1; 1667 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1668 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1669 return 0; 1670 } 1671 1672 /* -------------------------------------------------------------------*/ 1673 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1674 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1675 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1676 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1677 MatSolve_SeqBAIJ_N,0, 1678 0,0, 1679 MatLUFactor_SeqBAIJ,0, 1680 0, 1681 MatTranspose_SeqBAIJ, 1682 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1683 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1684 0,MatAssemblyEnd_SeqBAIJ, 1685 0, 1686 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1687 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1688 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1689 MatILUFactorSymbolic_SeqBAIJ,0, 1690 0,0,/* MatConvert_SeqBAIJ */ 0, 1691 MatConvertSameType_SeqBAIJ,0,0, 1692 MatILUFactor_SeqBAIJ,0,0, 1693 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1694 MatGetValues_SeqBAIJ,0, 1695 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1696 0,0,0,MatGetBlockSize_SeqBAIJ, 1697 MatGetRowIJ_SeqBAIJ, 1698 MatRestoreRowIJ_SeqBAIJ}; 1699 1700 /*@C 1701 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1702 compressed row) format. For good matrix assembly performance the 1703 user should preallocate the matrix storage by setting the parameter nz 1704 (or the array nzz). By setting these parameters accurately, performance 1705 during matrix assembly can be increased by more than a factor of 50. 1706 1707 Input Parameters: 1708 . comm - MPI communicator, set to MPI_COMM_SELF 1709 . bs - size of block 1710 . m - number of rows 1711 . n - number of columns 1712 . nz - number of block nonzeros per block row (same for all rows) 1713 . nzz - array containing the number of block nonzeros in the various block rows 1714 (possibly different for each block row) or PETSC_NULL 1715 1716 Output Parameter: 1717 . A - the matrix 1718 1719 Notes: 1720 The block AIJ format is fully compatible with standard Fortran 77 1721 storage. That is, the stored row and column indices can begin at 1722 either one (as in Fortran) or zero. See the users' manual for details. 1723 1724 Specify the preallocated storage with either nz or nnz (not both). 1725 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1726 allocation. For additional details, see the users manual chapter on 1727 matrices and the file $(PETSC_DIR)/Performance. 1728 1729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1730 @*/ 1731 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1732 { 1733 Mat B; 1734 Mat_SeqBAIJ *b; 1735 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 1736 1737 MPI_Comm_size(comm,&size); 1738 if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1"); 1739 1740 if (mbs*bs!=m || nbs*bs!=n) 1741 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1742 1743 *A = 0; 1744 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1745 PLogObjectCreate(B); 1746 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1747 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1748 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1749 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1750 if (!flg) { 1751 switch (bs) { 1752 case 1: 1753 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1754 B->ops.solve = MatSolve_SeqBAIJ_1; 1755 B->ops.mult = MatMult_SeqBAIJ_1; 1756 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1757 break; 1758 case 2: 1759 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1760 B->ops.solve = MatSolve_SeqBAIJ_2; 1761 B->ops.mult = MatMult_SeqBAIJ_2; 1762 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1763 break; 1764 case 3: 1765 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1766 B->ops.solve = MatSolve_SeqBAIJ_3; 1767 B->ops.mult = MatMult_SeqBAIJ_3; 1768 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1769 break; 1770 case 4: 1771 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1772 B->ops.solve = MatSolve_SeqBAIJ_4; 1773 B->ops.mult = MatMult_SeqBAIJ_4; 1774 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1775 break; 1776 case 5: 1777 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1778 B->ops.solve = MatSolve_SeqBAIJ_5; 1779 B->ops.mult = MatMult_SeqBAIJ_5; 1780 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1781 break; 1782 } 1783 } 1784 B->destroy = MatDestroy_SeqBAIJ; 1785 B->view = MatView_SeqBAIJ; 1786 B->factor = 0; 1787 B->lupivotthreshold = 1.0; 1788 B->mapping = 0; 1789 b->row = 0; 1790 b->col = 0; 1791 b->reallocs = 0; 1792 1793 b->m = m; B->m = m; B->M = m; 1794 b->n = n; B->n = n; B->N = n; 1795 b->mbs = mbs; 1796 b->nbs = nbs; 1797 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1798 if (nnz == PETSC_NULL) { 1799 if (nz == PETSC_DEFAULT) nz = 5; 1800 else if (nz <= 0) nz = 1; 1801 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1802 nz = nz*mbs; 1803 } 1804 else { 1805 nz = 0; 1806 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1807 } 1808 1809 /* allocate the matrix space */ 1810 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1811 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1812 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1813 b->j = (int *) (b->a + nz*bs2); 1814 PetscMemzero(b->j,nz*sizeof(int)); 1815 b->i = b->j + nz; 1816 b->singlemalloc = 1; 1817 1818 b->i[0] = 0; 1819 for (i=1; i<mbs+1; i++) { 1820 b->i[i] = b->i[i-1] + b->imax[i-1]; 1821 } 1822 1823 /* b->ilen will count nonzeros in each block row so far. */ 1824 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1825 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1826 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1827 1828 b->bs = bs; 1829 b->bs2 = bs2; 1830 b->mbs = mbs; 1831 b->nz = 0; 1832 b->maxnz = nz*bs2; 1833 b->sorted = 0; 1834 b->roworiented = 1; 1835 b->nonew = 0; 1836 b->diag = 0; 1837 b->solve_work = 0; 1838 b->mult_work = 0; 1839 b->spptr = 0; 1840 B->info.nz_unneeded = (double)b->maxnz; 1841 1842 *A = B; 1843 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1844 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1845 return 0; 1846 } 1847 1848 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1849 { 1850 Mat C; 1851 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1852 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1853 1854 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1855 1856 *B = 0; 1857 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1858 PLogObjectCreate(C); 1859 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1860 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1861 C->destroy = MatDestroy_SeqBAIJ; 1862 C->view = MatView_SeqBAIJ; 1863 C->factor = A->factor; 1864 c->row = 0; 1865 c->col = 0; 1866 C->assembled = PETSC_TRUE; 1867 1868 c->m = C->m = a->m; 1869 c->n = C->n = a->n; 1870 C->M = a->m; 1871 C->N = a->n; 1872 1873 c->bs = a->bs; 1874 c->bs2 = a->bs2; 1875 c->mbs = a->mbs; 1876 c->nbs = a->nbs; 1877 1878 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1879 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1880 for ( i=0; i<mbs; i++ ) { 1881 c->imax[i] = a->imax[i]; 1882 c->ilen[i] = a->ilen[i]; 1883 } 1884 1885 /* allocate the matrix space */ 1886 c->singlemalloc = 1; 1887 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1888 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1889 c->j = (int *) (c->a + nz*bs2); 1890 c->i = c->j + nz; 1891 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1892 if (mbs > 0) { 1893 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1894 if (cpvalues == COPY_VALUES) { 1895 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1896 } 1897 } 1898 1899 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1900 c->sorted = a->sorted; 1901 c->roworiented = a->roworiented; 1902 c->nonew = a->nonew; 1903 1904 if (a->diag) { 1905 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1906 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1907 for ( i=0; i<mbs; i++ ) { 1908 c->diag[i] = a->diag[i]; 1909 } 1910 } 1911 else c->diag = 0; 1912 c->nz = a->nz; 1913 c->maxnz = a->maxnz; 1914 c->solve_work = 0; 1915 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1916 c->mult_work = 0; 1917 *B = C; 1918 return 0; 1919 } 1920 1921 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1922 { 1923 Mat_SeqBAIJ *a; 1924 Mat B; 1925 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1926 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1927 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1928 int *masked, nmask,tmp,bs2,ishift; 1929 Scalar *aa; 1930 MPI_Comm comm = ((PetscObject) viewer)->comm; 1931 1932 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1933 bs2 = bs*bs; 1934 1935 MPI_Comm_size(comm,&size); 1936 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1937 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1938 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1939 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1940 M = header[1]; N = header[2]; nz = header[3]; 1941 1942 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1943 1944 /* 1945 This code adds extra rows to make sure the number of rows is 1946 divisible by the blocksize 1947 */ 1948 mbs = M/bs; 1949 extra_rows = bs - M + bs*(mbs); 1950 if (extra_rows == bs) extra_rows = 0; 1951 else mbs++; 1952 if (extra_rows) { 1953 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1954 } 1955 1956 /* read in row lengths */ 1957 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1958 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1959 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1960 1961 /* read in column indices */ 1962 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1963 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1964 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1965 1966 /* loop over row lengths determining block row lengths */ 1967 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1968 PetscMemzero(browlengths,mbs*sizeof(int)); 1969 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1970 PetscMemzero(mask,mbs*sizeof(int)); 1971 masked = mask + mbs; 1972 rowcount = 0; nzcount = 0; 1973 for ( i=0; i<mbs; i++ ) { 1974 nmask = 0; 1975 for ( j=0; j<bs; j++ ) { 1976 kmax = rowlengths[rowcount]; 1977 for ( k=0; k<kmax; k++ ) { 1978 tmp = jj[nzcount++]/bs; 1979 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1980 } 1981 rowcount++; 1982 } 1983 browlengths[i] += nmask; 1984 /* zero out the mask elements we set */ 1985 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1986 } 1987 1988 /* create our matrix */ 1989 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1990 CHKERRQ(ierr); 1991 B = *A; 1992 a = (Mat_SeqBAIJ *) B->data; 1993 1994 /* set matrix "i" values */ 1995 a->i[0] = 0; 1996 for ( i=1; i<= mbs; i++ ) { 1997 a->i[i] = a->i[i-1] + browlengths[i-1]; 1998 a->ilen[i-1] = browlengths[i-1]; 1999 } 2000 a->nz = 0; 2001 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2002 2003 /* read in nonzero values */ 2004 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2005 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2006 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2007 2008 /* set "a" and "j" values into matrix */ 2009 nzcount = 0; jcount = 0; 2010 for ( i=0; i<mbs; i++ ) { 2011 nzcountb = nzcount; 2012 nmask = 0; 2013 for ( j=0; j<bs; j++ ) { 2014 kmax = rowlengths[i*bs+j]; 2015 for ( k=0; k<kmax; k++ ) { 2016 tmp = jj[nzcount++]/bs; 2017 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2018 } 2019 rowcount++; 2020 } 2021 /* sort the masked values */ 2022 PetscSortInt(nmask,masked); 2023 2024 /* set "j" values into matrix */ 2025 maskcount = 1; 2026 for ( j=0; j<nmask; j++ ) { 2027 a->j[jcount++] = masked[j]; 2028 mask[masked[j]] = maskcount++; 2029 } 2030 /* set "a" values into matrix */ 2031 ishift = bs2*a->i[i]; 2032 for ( j=0; j<bs; j++ ) { 2033 kmax = rowlengths[i*bs+j]; 2034 for ( k=0; k<kmax; k++ ) { 2035 tmp = jj[nzcountb]/bs ; 2036 block = mask[tmp] - 1; 2037 point = jj[nzcountb] - bs*tmp; 2038 idx = ishift + bs2*block + j + bs*point; 2039 a->a[idx] = aa[nzcountb++]; 2040 } 2041 } 2042 /* zero out the mask elements we set */ 2043 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2044 } 2045 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2046 2047 PetscFree(rowlengths); 2048 PetscFree(browlengths); 2049 PetscFree(aa); 2050 PetscFree(jj); 2051 PetscFree(mask); 2052 2053 B->assembled = PETSC_TRUE; 2054 2055 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2056 if (flg) { 2057 Viewer tviewer; 2058 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2059 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2060 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2061 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2062 } 2063 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2064 if (flg) { 2065 Viewer tviewer; 2066 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2067 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2068 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2069 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2070 } 2071 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2072 if (flg) { 2073 Viewer tviewer; 2074 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2075 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2076 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2077 } 2078 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2079 if (flg) { 2080 Viewer tviewer; 2081 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2082 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2083 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2084 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2085 } 2086 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2087 if (flg) { 2088 Viewer tviewer; 2089 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2090 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2091 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2092 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2093 } 2094 return 0; 2095 } 2096 2097 2098 2099