1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baij.c,v 1.108 1997/07/21 02:29:01 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 10 #include "pinclude/pviewer.h" 11 #include "sys.h" 12 #include "src/mat/impls/baij/seq/baij.h" 13 #include "src/vec/vecimpl.h" 14 #include "src/inline/spops.h" 15 #include "petsc.h" 16 17 18 /* 19 Adds diagonal pointers to sparse matrix structure. 20 */ 21 22 #undef __FUNC__ 23 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24 int MatMarkDiag_SeqBAIJ(Mat A) 25 { 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 27 int i,j, *diag, m = a->mbs; 28 29 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30 PLogObjectMemory(A,(m+1)*sizeof(int)); 31 for ( i=0; i<m; i++ ) { 32 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33 if (a->j[j] == i) { 34 diag[i] = j; 35 break; 36 } 37 } 38 } 39 a->diag = diag; 40 return 0; 41 } 42 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */ 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */ 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */ 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 181 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183 #else 184 if (a->a[bs2*k + l*bs + j] != 0.0) 185 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186 #endif 187 } 188 } 189 fprintf(fd,"\n"); 190 } 191 } 192 } 193 else { 194 for ( i=0; i<a->mbs; i++ ) { 195 for ( j=0; j<bs; j++ ) { 196 fprintf(fd,"row %d:",i*bs+j); 197 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198 for ( l=0; l<bs; l++ ) { 199 #if defined(PETSC_COMPLEX) 200 if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 201 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 202 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 203 } 204 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 205 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 206 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 207 } 208 else { 209 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 210 } 211 #else 212 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 213 #endif 214 } 215 } 216 fprintf(fd,"\n"); 217 } 218 } 219 } 220 fflush(fd); 221 return 0; 222 } 223 224 #undef __FUNC__ 225 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */ 226 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 227 { 228 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 229 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 230 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 231 Scalar *aa; 232 Draw draw; 233 DrawButton button; 234 PetscTruth isnull; 235 236 ViewerDrawGetDraw(viewer,&draw); 237 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 238 239 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 240 xr += w; yr += h; xl = -w; yl = -h; 241 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 242 /* loop over matrix elements drawing boxes */ 243 color = DRAW_BLUE; 244 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 245 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 246 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 247 x_l = a->j[j]*bs; x_r = x_l + 1.0; 248 aa = a->a + j*bs2; 249 for ( k=0; k<bs; k++ ) { 250 for ( l=0; l<bs; l++ ) { 251 if (PetscReal(*aa++) >= 0.) continue; 252 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 253 } 254 } 255 } 256 } 257 color = DRAW_CYAN; 258 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 259 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 260 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 261 x_l = a->j[j]*bs; x_r = x_l + 1.0; 262 aa = a->a + j*bs2; 263 for ( k=0; k<bs; k++ ) { 264 for ( l=0; l<bs; l++ ) { 265 if (PetscReal(*aa++) != 0.) continue; 266 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 267 } 268 } 269 } 270 } 271 272 color = DRAW_RED; 273 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 274 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 275 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 276 x_l = a->j[j]*bs; x_r = x_l + 1.0; 277 aa = a->a + j*bs2; 278 for ( k=0; k<bs; k++ ) { 279 for ( l=0; l<bs; l++ ) { 280 if (PetscReal(*aa++) <= 0.) continue; 281 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 282 } 283 } 284 } 285 } 286 287 DrawFlush(draw); 288 DrawGetPause(draw,&pause); 289 if (pause >= 0) { PetscSleep(pause); return 0;} 290 291 /* allow the matrix to zoom or shrink */ 292 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 293 while (button != BUTTON_RIGHT) { 294 DrawClear(draw); 295 if (button == BUTTON_LEFT) scale = .5; 296 else if (button == BUTTON_CENTER) scale = 2.; 297 xl = scale*(xl + w - xc) + xc - w*scale; 298 xr = scale*(xr - w - xc) + xc + w*scale; 299 yl = scale*(yl + h - yc) + yc - h*scale; 300 yr = scale*(yr - h - yc) + yc + h*scale; 301 w *= scale; h *= scale; 302 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 303 color = DRAW_BLUE; 304 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 305 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 306 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 307 x_l = a->j[j]*bs; x_r = x_l + 1.0; 308 aa = a->a + j*bs2; 309 for ( k=0; k<bs; k++ ) { 310 for ( l=0; l<bs; l++ ) { 311 if (PetscReal(*aa++) >= 0.) continue; 312 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 313 } 314 } 315 } 316 } 317 color = DRAW_CYAN; 318 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 319 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 320 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 321 x_l = a->j[j]*bs; x_r = x_l + 1.0; 322 aa = a->a + j*bs2; 323 for ( k=0; k<bs; k++ ) { 324 for ( l=0; l<bs; l++ ) { 325 if (PetscReal(*aa++) != 0.) continue; 326 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 327 } 328 } 329 } 330 } 331 332 color = DRAW_RED; 333 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 334 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 335 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 336 x_l = a->j[j]*bs; x_r = x_l + 1.0; 337 aa = a->a + j*bs2; 338 for ( k=0; k<bs; k++ ) { 339 for ( l=0; l<bs; l++ ) { 340 if (PetscReal(*aa++) <= 0.) continue; 341 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 342 } 343 } 344 } 345 } 346 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 347 } 348 return 0; 349 } 350 351 #undef __FUNC__ 352 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */ 353 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 354 { 355 Mat A = (Mat) obj; 356 ViewerType vtype; 357 int ierr; 358 359 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 360 if (vtype == MATLAB_VIEWER) { 361 SETERRQ(1,0,"Matlab viewer not supported"); 362 } 363 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 364 return MatView_SeqBAIJ_ASCII(A,viewer); 365 } 366 else if (vtype == BINARY_FILE_VIEWER) { 367 return MatView_SeqBAIJ_Binary(A,viewer); 368 } 369 else if (vtype == DRAW_VIEWER) { 370 return MatView_SeqBAIJ_Draw(A,viewer); 371 } 372 return 0; 373 } 374 375 #define CHUNKSIZE 10 376 377 #undef __FUNC__ 378 #define __FUNC__ "MatSetValues_SeqBAIJ" 379 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 380 { 381 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 382 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 383 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 384 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 385 int ridx,cidx,bs2=a->bs2; 386 Scalar *ap,value,*aa=a->a,*bap; 387 388 for ( k=0; k<m; k++ ) { /* loop over added rows */ 389 row = im[k]; brow = row/bs; 390 #if defined(PETSC_BOPT_g) 391 if (row < 0) SETERRQ(1,0,"Negative row"); 392 if (row >= a->m) SETERRQ(1,0,"Row too large"); 393 #endif 394 rp = aj + ai[brow]; 395 ap = aa + bs2*ai[brow]; 396 rmax = imax[brow]; 397 nrow = ailen[brow]; 398 low = 0; 399 for ( l=0; l<n; l++ ) { /* loop over added columns */ 400 #if defined(PETSC_BOPT_g) 401 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 402 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 403 #endif 404 col = in[l]; bcol = col/bs; 405 ridx = row % bs; cidx = col % bs; 406 if (roworiented) { 407 value = *v++; 408 } 409 else { 410 value = v[k + l*m]; 411 } 412 if (!sorted) low = 0; high = nrow; 413 while (high-low > 7) { 414 t = (low+high)/2; 415 if (rp[t] > bcol) high = t; 416 else low = t; 417 } 418 for ( i=low; i<high; i++ ) { 419 if (rp[i] > bcol) break; 420 if (rp[i] == bcol) { 421 bap = ap + bs2*i + bs*cidx + ridx; 422 if (is == ADD_VALUES) *bap += value; 423 else *bap = value; 424 goto noinsert1; 425 } 426 } 427 if (nonew == 1) goto noinsert1; 428 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 429 if (nrow >= rmax) { 430 /* there is no extra room in row, therefore enlarge */ 431 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 432 Scalar *new_a; 433 434 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 435 436 /* Malloc new storage space */ 437 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 438 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 439 new_j = (int *) (new_a + bs2*new_nz); 440 new_i = new_j + new_nz; 441 442 /* copy over old data into new slots */ 443 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 444 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 445 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 446 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 447 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 448 len*sizeof(int)); 449 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 450 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 451 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 452 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 453 /* free up old matrix storage */ 454 PetscFree(a->a); 455 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 456 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 457 a->singlemalloc = 1; 458 459 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 460 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 461 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 462 a->maxnz += bs2*CHUNKSIZE; 463 a->reallocs++; 464 a->nz++; 465 } 466 N = nrow++ - 1; 467 /* shift up all the later entries in this row */ 468 for ( ii=N; ii>=i; ii-- ) { 469 rp[ii+1] = rp[ii]; 470 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 471 } 472 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 473 rp[i] = bcol; 474 ap[bs2*i + bs*cidx + ridx] = value; 475 noinsert1:; 476 low = i; 477 } 478 ailen[brow] = nrow; 479 } 480 return 0; 481 } 482 483 #undef __FUNC__ 484 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 485 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 486 { 487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 488 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 489 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 490 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 491 Scalar *ap,*value=v,*aa=a->a,*bap; 492 493 if (roworiented) { 494 stepval = (n-1)*bs; 495 } else { 496 stepval = (m-1)*bs; 497 } 498 for ( k=0; k<m; k++ ) { /* loop over added rows */ 499 row = im[k]; 500 #if defined(PETSC_BOPT_g) 501 if (row < 0) SETERRQ(1,0,"Negative row"); 502 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 503 #endif 504 rp = aj + ai[row]; 505 ap = aa + bs2*ai[row]; 506 rmax = imax[row]; 507 nrow = ailen[row]; 508 low = 0; 509 for ( l=0; l<n; l++ ) { /* loop over added columns */ 510 #if defined(PETSC_BOPT_g) 511 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 512 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 513 #endif 514 col = in[l]; 515 if (roworiented) { 516 value = v + k*(stepval+bs)*bs + l*bs; 517 } else { 518 value = v + l*(stepval+bs)*bs + k*bs; 519 } 520 if (!sorted) low = 0; high = nrow; 521 while (high-low > 7) { 522 t = (low+high)/2; 523 if (rp[t] > col) high = t; 524 else low = t; 525 } 526 for ( i=low; i<high; i++ ) { 527 if (rp[i] > col) break; 528 if (rp[i] == col) { 529 bap = ap + bs2*i; 530 if (roworiented) { 531 if (is == ADD_VALUES) { 532 for ( ii=0; ii<bs; ii++,value+=stepval ) { 533 for (jj=ii; jj<bs2; jj+=bs ) { 534 bap[jj] += *value++; 535 } 536 } 537 } else { 538 for ( ii=0; ii<bs; ii++,value+=stepval ) { 539 for (jj=ii; jj<bs2; jj+=bs ) { 540 bap[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 if (is == ADD_VALUES) { 546 for ( ii=0; ii<bs; ii++,value+=stepval ) { 547 for (jj=0; jj<bs; jj++ ) { 548 *bap++ += *value++; 549 } 550 } 551 } else { 552 for ( ii=0; ii<bs; ii++,value+=stepval ) { 553 for (jj=0; jj<bs; jj++ ) { 554 *bap++ = *value++; 555 } 556 } 557 } 558 } 559 goto noinsert2; 560 } 561 } 562 if (nonew == 1) goto noinsert2; 563 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 564 if (nrow >= rmax) { 565 /* there is no extra room in row, therefore enlarge */ 566 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 567 Scalar *new_a; 568 569 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 570 571 /* malloc new storage space */ 572 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 573 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 574 new_j = (int *) (new_a + bs2*new_nz); 575 new_i = new_j + new_nz; 576 577 /* copy over old data into new slots */ 578 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 579 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 580 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 581 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 582 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 583 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 584 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 585 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 586 /* free up old matrix storage */ 587 PetscFree(a->a); 588 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 589 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 590 a->singlemalloc = 1; 591 592 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 593 rmax = imax[row] = imax[row] + CHUNKSIZE; 594 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 595 a->maxnz += bs2*CHUNKSIZE; 596 a->reallocs++; 597 a->nz++; 598 } 599 N = nrow++ - 1; 600 /* shift up all the later entries in this row */ 601 for ( ii=N; ii>=i; ii-- ) { 602 rp[ii+1] = rp[ii]; 603 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 604 } 605 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 606 rp[i] = col; 607 bap = ap + bs2*i; 608 if (roworiented) { 609 for ( ii=0; ii<bs; ii++,value+=stepval) { 610 for (jj=ii; jj<bs2; jj+=bs ) { 611 bap[jj] = *value++; 612 } 613 } 614 } else { 615 for ( ii=0; ii<bs; ii++,value+=stepval ) { 616 for (jj=0; jj<bs; jj++ ) { 617 *bap++ = *value++; 618 } 619 } 620 } 621 noinsert2:; 622 low = i; 623 } 624 ailen[row] = nrow; 625 } 626 return 0; 627 } 628 629 #undef __FUNC__ 630 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */ 631 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 632 { 633 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 634 *m = a->m; *n = a->n; 635 return 0; 636 } 637 638 #undef __FUNC__ 639 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */ 640 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 641 { 642 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 643 *m = 0; *n = a->m; 644 return 0; 645 } 646 647 #undef __FUNC__ 648 #define __FUNC__ "MatGetRow_SeqBAIJ" 649 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 650 { 651 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 652 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 653 Scalar *aa,*v_i,*aa_i; 654 655 bs = a->bs; 656 ai = a->i; 657 aj = a->j; 658 aa = a->a; 659 bs2 = a->bs2; 660 661 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 662 663 bn = row/bs; /* Block number */ 664 bp = row % bs; /* Block Position */ 665 M = ai[bn+1] - ai[bn]; 666 *nz = bs*M; 667 668 if (v) { 669 *v = 0; 670 if (*nz) { 671 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 672 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 673 v_i = *v + i*bs; 674 aa_i = aa + bs2*(ai[bn] + i); 675 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 676 } 677 } 678 } 679 680 if (idx) { 681 *idx = 0; 682 if (*nz) { 683 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 684 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 685 idx_i = *idx + i*bs; 686 itmp = bs*aj[ai[bn] + i]; 687 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 688 } 689 } 690 } 691 return 0; 692 } 693 694 #undef __FUNC__ 695 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */ 696 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 697 { 698 if (idx) {if (*idx) PetscFree(*idx);} 699 if (v) {if (*v) PetscFree(*v);} 700 return 0; 701 } 702 703 #undef __FUNC__ 704 #define __FUNC__ "MatTranspose_SeqBAIJ" 705 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 706 { 707 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 708 Mat C; 709 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 710 int *rows,*cols,bs2=a->bs2; 711 Scalar *array=a->a; 712 713 if (B==PETSC_NULL && mbs!=nbs) 714 SETERRQ(1,0,"Square matrix only for in-place"); 715 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 716 PetscMemzero(col,(1+nbs)*sizeof(int)); 717 718 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 719 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 720 PetscFree(col); 721 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 722 cols = rows + bs; 723 for ( i=0; i<mbs; i++ ) { 724 cols[0] = i*bs; 725 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 726 len = ai[i+1] - ai[i]; 727 for ( j=0; j<len; j++ ) { 728 rows[0] = (*aj++)*bs; 729 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 730 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 731 array += bs2; 732 } 733 } 734 PetscFree(rows); 735 736 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 737 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 738 739 if (B != PETSC_NULL) { 740 *B = C; 741 } else { 742 /* This isn't really an in-place transpose */ 743 PetscFree(a->a); 744 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 745 if (a->diag) PetscFree(a->diag); 746 if (a->ilen) PetscFree(a->ilen); 747 if (a->imax) PetscFree(a->imax); 748 if (a->solve_work) PetscFree(a->solve_work); 749 PetscFree(a); 750 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 751 PetscHeaderDestroy(C); 752 } 753 return 0; 754 } 755 756 757 #undef __FUNC__ 758 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 759 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 760 { 761 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 762 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 763 int m = a->m,*ip, N, *ailen = a->ilen; 764 int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 765 Scalar *aa = a->a, *ap; 766 767 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 768 769 if (m) rmax = ailen[0]; 770 for ( i=1; i<mbs; i++ ) { 771 /* move each row back by the amount of empty slots (fshift) before it*/ 772 fshift += imax[i-1] - ailen[i-1]; 773 rmax = PetscMax(rmax,ailen[i]); 774 if (fshift) { 775 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 776 N = ailen[i]; 777 for ( j=0; j<N; j++ ) { 778 ip[j-fshift] = ip[j]; 779 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 780 } 781 } 782 ai[i] = ai[i-1] + ailen[i-1]; 783 } 784 if (mbs) { 785 fshift += imax[mbs-1] - ailen[mbs-1]; 786 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 787 } 788 /* reset ilen and imax for each row */ 789 for ( i=0; i<mbs; i++ ) { 790 ailen[i] = imax[i] = ai[i+1] - ai[i]; 791 } 792 a->nz = ai[mbs]; 793 794 /* diagonals may have moved, so kill the diagonal pointers */ 795 if (fshift && a->diag) { 796 PetscFree(a->diag); 797 PLogObjectMemory(A,-(m+1)*sizeof(int)); 798 a->diag = 0; 799 } 800 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 801 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 802 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 803 a->reallocs); 804 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 805 a->reallocs = 0; 806 A->info.nz_unneeded = (double)fshift*bs2; 807 808 return 0; 809 } 810 811 #undef __FUNC__ 812 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 813 int MatZeroEntries_SeqBAIJ(Mat A) 814 { 815 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 816 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 817 return 0; 818 } 819 820 #undef __FUNC__ 821 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */ 822 int MatDestroy_SeqBAIJ(PetscObject obj) 823 { 824 Mat A = (Mat) obj; 825 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 826 827 #if defined(PETSC_LOG) 828 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 829 #endif 830 PetscFree(a->a); 831 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 832 if (a->diag) PetscFree(a->diag); 833 if (a->ilen) PetscFree(a->ilen); 834 if (a->imax) PetscFree(a->imax); 835 if (a->solve_work) PetscFree(a->solve_work); 836 if (a->mult_work) PetscFree(a->mult_work); 837 PetscFree(a); 838 PLogObjectDestroy(A); 839 PetscHeaderDestroy(A); 840 return 0; 841 } 842 843 #undef __FUNC__ 844 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */ 845 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 846 { 847 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 848 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 849 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 850 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 851 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 852 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 853 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 854 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 855 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 856 else if (op == MAT_ROWS_SORTED || 857 op == MAT_ROWS_UNSORTED || 858 op == MAT_SYMMETRIC || 859 op == MAT_STRUCTURALLY_SYMMETRIC || 860 op == MAT_YES_NEW_DIAGONALS || 861 op == MAT_IGNORE_OFF_PROC_ENTRIES) 862 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 863 else if (op == MAT_NO_NEW_DIAGONALS) 864 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 865 else 866 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 867 return 0; 868 } 869 870 871 /* -------------------------------------------------------*/ 872 /* Should check that shapes of vectors and matrices match */ 873 /* -------------------------------------------------------*/ 874 #include "pinclude/plapack.h" 875 876 #undef __FUNC__ 877 #define __FUNC__ "MatMult_SeqBAIJ_1" 878 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 879 { 880 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 881 register Scalar *x,*z,*v,sum; 882 int mbs=a->mbs,i,*idx,*ii,n; 883 884 VecGetArray_Fast(xx,x); 885 VecGetArray_Fast(zz,z); 886 887 idx = a->j; 888 v = a->a; 889 ii = a->i; 890 891 for ( i=0; i<mbs; i++ ) { 892 n = ii[1] - ii[0]; ii++; 893 sum = 0.0; 894 while (n--) sum += *v++ * x[*idx++]; 895 z[i] = sum; 896 } 897 VecRestoreArray_Fast(xx,x); 898 VecRestoreArray_Fast(zz,z); 899 PLogFlops(2*a->nz - a->m); 900 return 0; 901 } 902 903 #undef __FUNC__ 904 #define __FUNC__ "MatMult_SeqBAIJ_2" 905 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 906 { 907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 908 register Scalar *x,*z,*v,*xb,sum1,sum2; 909 register Scalar x1,x2; 910 int mbs=a->mbs,i,*idx,*ii; 911 int j,n; 912 913 VecGetArray_Fast(xx,x); 914 VecGetArray_Fast(zz,z); 915 916 idx = a->j; 917 v = a->a; 918 ii = a->i; 919 920 for ( i=0; i<mbs; i++ ) { 921 n = ii[1] - ii[0]; ii++; 922 sum1 = 0.0; sum2 = 0.0; 923 for ( j=0; j<n; j++ ) { 924 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 925 sum1 += v[0]*x1 + v[2]*x2; 926 sum2 += v[1]*x1 + v[3]*x2; 927 v += 4; 928 } 929 z[0] = sum1; z[1] = sum2; 930 z += 2; 931 } 932 VecRestoreArray_Fast(xx,x); 933 VecRestoreArray_Fast(zz,z); 934 PLogFlops(4*a->nz - a->m); 935 return 0; 936 } 937 938 #undef __FUNC__ 939 #define __FUNC__ "MatMult_SeqBAIJ_3" 940 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 941 { 942 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 943 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 944 int mbs=a->mbs,i,*idx,*ii,j,n; 945 946 VecGetArray_Fast(xx,x); 947 VecGetArray_Fast(zz,z); 948 949 idx = a->j; 950 v = a->a; 951 ii = a->i; 952 953 for ( i=0; i<mbs; i++ ) { 954 n = ii[1] - ii[0]; ii++; 955 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 956 for ( j=0; j<n; j++ ) { 957 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 958 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 959 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 960 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 961 v += 9; 962 } 963 z[0] = sum1; z[1] = sum2; z[2] = sum3; 964 z += 3; 965 } 966 VecRestoreArray_Fast(xx,x); 967 VecRestoreArray_Fast(zz,z); 968 PLogFlops(18*a->nz - a->m); 969 return 0; 970 } 971 972 #undef __FUNC__ 973 #define __FUNC__ "MatMult_SeqBAIJ_4" 974 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 975 { 976 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 977 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 978 register Scalar x1,x2,x3,x4; 979 int mbs=a->mbs,i,*idx,*ii; 980 int j,n; 981 982 VecGetArray_Fast(xx,x); 983 VecGetArray_Fast(zz,z); 984 985 idx = a->j; 986 v = a->a; 987 ii = a->i; 988 989 for ( i=0; i<mbs; i++ ) { 990 n = ii[1] - ii[0]; ii++; 991 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 992 for ( j=0; j<n; j++ ) { 993 xb = x + 4*(*idx++); 994 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 995 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 996 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 997 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 998 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 999 v += 16; 1000 } 1001 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1002 z += 4; 1003 } 1004 VecRestoreArray_Fast(xx,x); 1005 VecRestoreArray_Fast(zz,z); 1006 PLogFlops(32*a->nz - a->m); 1007 return 0; 1008 } 1009 1010 #undef __FUNC__ 1011 #define __FUNC__ "MatMult_SeqBAIJ_5" 1012 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 1013 { 1014 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1015 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1016 register Scalar x1,x2,x3,x4,x5; 1017 int mbs=a->mbs,i,*idx,*ii,j,n; 1018 1019 VecGetArray_Fast(xx,x); 1020 VecGetArray_Fast(zz,z); 1021 1022 idx = a->j; 1023 v = a->a; 1024 ii = a->i; 1025 1026 for ( i=0; i<mbs; i++ ) { 1027 n = ii[1] - ii[0]; ii++; 1028 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1029 for ( j=0; j<n; j++ ) { 1030 xb = x + 5*(*idx++); 1031 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1032 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1033 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1034 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1035 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1036 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1037 v += 25; 1038 } 1039 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1040 z += 5; 1041 } 1042 VecRestoreArray_Fast(xx,x); 1043 VecRestoreArray_Fast(zz,z); 1044 PLogFlops(50*a->nz - a->m); 1045 return 0; 1046 } 1047 1048 #undef __FUNC__ 1049 #define __FUNC__ "MatMult_SeqBAIJ_7" 1050 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1051 { 1052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1053 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1054 register Scalar x1,x2,x3,x4,x5,x6,x7; 1055 int mbs=a->mbs,i,*idx,*ii,j,n; 1056 1057 VecGetArray_Fast(xx,x); 1058 VecGetArray_Fast(zz,z); 1059 1060 idx = a->j; 1061 v = a->a; 1062 ii = a->i; 1063 1064 for ( i=0; i<mbs; i++ ) { 1065 n = ii[1] - ii[0]; ii++; 1066 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1067 for ( j=0; j<n; j++ ) { 1068 xb = x + 7*(*idx++); 1069 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1070 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1071 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1072 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1073 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1074 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1075 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1076 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1077 v += 49; 1078 } 1079 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1080 z += 7; 1081 } 1082 1083 VecRestoreArray_Fast(xx,x); 1084 VecRestoreArray_Fast(zz,z); 1085 PLogFlops(98*a->nz - a->m); 1086 return 0; 1087 } 1088 1089 #undef __FUNC__ 1090 #define __FUNC__ "MatMult_SeqBAIJ_N" 1091 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1092 { 1093 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1094 register Scalar *x,*z,*v,*xb; 1095 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1096 int _One = 1,ncols,k; 1097 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1098 1099 VecGetArray_Fast(xx,x); 1100 VecGetArray_Fast(zz,z); 1101 1102 idx = a->j; 1103 v = a->a; 1104 ii = a->i; 1105 1106 1107 if (!a->mult_work) { 1108 k = PetscMax(a->m,a->n); 1109 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1110 } 1111 work = a->mult_work; 1112 for ( i=0; i<mbs; i++ ) { 1113 n = ii[1] - ii[0]; ii++; 1114 ncols = n*bs; 1115 workt = work; 1116 for ( j=0; j<n; j++ ) { 1117 xb = x + bs*(*idx++); 1118 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1119 workt += bs; 1120 } 1121 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1122 v += n*bs2; 1123 z += bs; 1124 } 1125 VecRestoreArray_Fast(xx,x); 1126 VecRestoreArray_Fast(zz,z); 1127 PLogFlops(2*a->nz*bs2 - a->m); 1128 return 0; 1129 } 1130 1131 #undef __FUNC__ 1132 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1133 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1134 { 1135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1136 register Scalar *x,*y,*z,*v,sum; 1137 int mbs=a->mbs,i,*idx,*ii,n; 1138 1139 VecGetArray_Fast(xx,x); 1140 VecGetArray_Fast(yy,y); 1141 VecGetArray_Fast(zz,z); 1142 1143 idx = a->j; 1144 v = a->a; 1145 ii = a->i; 1146 1147 for ( i=0; i<mbs; i++ ) { 1148 n = ii[1] - ii[0]; ii++; 1149 sum = y[i]; 1150 while (n--) sum += *v++ * x[*idx++]; 1151 z[i] = sum; 1152 } 1153 VecRestoreArray_Fast(xx,x); 1154 VecRestoreArray_Fast(yy,y); 1155 VecRestoreArray_Fast(zz,z); 1156 PLogFlops(2*a->nz); 1157 return 0; 1158 } 1159 1160 #undef __FUNC__ 1161 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1162 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1163 { 1164 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1165 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1166 register Scalar x1,x2; 1167 int mbs=a->mbs,i,*idx,*ii; 1168 int j,n; 1169 1170 VecGetArray_Fast(xx,x); 1171 VecGetArray_Fast(yy,y); 1172 VecGetArray_Fast(zz,z); 1173 1174 idx = a->j; 1175 v = a->a; 1176 ii = a->i; 1177 1178 for ( i=0; i<mbs; i++ ) { 1179 n = ii[1] - ii[0]; ii++; 1180 sum1 = y[0]; sum2 = y[1]; 1181 for ( j=0; j<n; j++ ) { 1182 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1183 sum1 += v[0]*x1 + v[2]*x2; 1184 sum2 += v[1]*x1 + v[3]*x2; 1185 v += 4; 1186 } 1187 z[0] = sum1; z[1] = sum2; 1188 z += 2; y += 2; 1189 } 1190 VecRestoreArray_Fast(xx,x); 1191 VecRestoreArray_Fast(yy,y); 1192 VecRestoreArray_Fast(zz,z); 1193 PLogFlops(4*a->nz); 1194 return 0; 1195 } 1196 1197 #undef __FUNC__ 1198 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1199 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1200 { 1201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1202 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1203 int mbs=a->mbs,i,*idx,*ii,j,n; 1204 1205 VecGetArray_Fast(xx,x); 1206 VecGetArray_Fast(yy,y); 1207 VecGetArray_Fast(zz,z); 1208 1209 idx = a->j; 1210 v = a->a; 1211 ii = a->i; 1212 1213 for ( i=0; i<mbs; i++ ) { 1214 n = ii[1] - ii[0]; ii++; 1215 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1216 for ( j=0; j<n; j++ ) { 1217 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1218 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1219 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1220 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1221 v += 9; 1222 } 1223 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1224 z += 3; y += 3; 1225 } 1226 VecRestoreArray_Fast(xx,x); 1227 VecRestoreArray_Fast(yy,y); 1228 VecRestoreArray_Fast(zz,z); 1229 PLogFlops(18*a->nz); 1230 return 0; 1231 } 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1235 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1236 { 1237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1238 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1239 register Scalar x1,x2,x3,x4; 1240 int mbs=a->mbs,i,*idx,*ii; 1241 int j,n; 1242 1243 VecGetArray_Fast(xx,x); 1244 VecGetArray_Fast(yy,y); 1245 VecGetArray_Fast(zz,z); 1246 1247 idx = a->j; 1248 v = a->a; 1249 ii = a->i; 1250 1251 for ( i=0; i<mbs; i++ ) { 1252 n = ii[1] - ii[0]; ii++; 1253 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1254 for ( j=0; j<n; j++ ) { 1255 xb = x + 4*(*idx++); 1256 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1257 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1258 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1259 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1260 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1261 v += 16; 1262 } 1263 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1264 z += 4; y += 4; 1265 } 1266 VecRestoreArray_Fast(xx,x); 1267 VecRestoreArray_Fast(yy,y); 1268 VecRestoreArray_Fast(zz,z); 1269 PLogFlops(32*a->nz); 1270 return 0; 1271 } 1272 1273 #undef __FUNC__ 1274 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1275 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1276 { 1277 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1278 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1279 register Scalar x1,x2,x3,x4,x5; 1280 int mbs=a->mbs,i,*idx,*ii,j,n; 1281 1282 VecGetArray_Fast(xx,x); 1283 VecGetArray_Fast(yy,y); 1284 VecGetArray_Fast(zz,z); 1285 1286 idx = a->j; 1287 v = a->a; 1288 ii = a->i; 1289 1290 for ( i=0; i<mbs; i++ ) { 1291 n = ii[1] - ii[0]; ii++; 1292 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1293 for ( j=0; j<n; j++ ) { 1294 xb = x + 5*(*idx++); 1295 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1296 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1297 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1298 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1299 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1300 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1301 v += 25; 1302 } 1303 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1304 z += 5; y += 5; 1305 } 1306 VecRestoreArray_Fast(xx,x); 1307 VecRestoreArray_Fast(yy,y); 1308 VecRestoreArray_Fast(zz,z); 1309 PLogFlops(50*a->nz); 1310 return 0; 1311 } 1312 1313 #undef __FUNC__ 1314 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1315 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1316 { 1317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1318 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1319 register Scalar x1,x2,x3,x4,x5,x6,x7; 1320 int mbs=a->mbs,i,*idx,*ii,j,n; 1321 1322 VecGetArray_Fast(xx,x); 1323 VecGetArray_Fast(yy,y); 1324 VecGetArray_Fast(zz,z); 1325 1326 idx = a->j; 1327 v = a->a; 1328 ii = a->i; 1329 1330 for ( i=0; i<mbs; i++ ) { 1331 n = ii[1] - ii[0]; ii++; 1332 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1333 for ( j=0; j<n; j++ ) { 1334 xb = x + 7*(*idx++); 1335 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1336 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1337 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1338 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1339 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1340 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1341 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1342 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1343 v += 49; 1344 } 1345 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1346 z += 7; y += 7; 1347 } 1348 VecRestoreArray_Fast(xx,x); 1349 VecRestoreArray_Fast(yy,y); 1350 VecRestoreArray_Fast(zz,z); 1351 PLogFlops(98*a->nz); 1352 return 0; 1353 } 1354 1355 1356 #undef __FUNC__ 1357 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1358 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1359 { 1360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1361 register Scalar *x,*z,*v,*xb; 1362 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1363 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1364 1365 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1366 1367 VecGetArray_Fast(xx,x); 1368 VecGetArray_Fast(zz,z); 1369 1370 idx = a->j; 1371 v = a->a; 1372 ii = a->i; 1373 1374 1375 if (!a->mult_work) { 1376 k = PetscMax(a->m,a->n); 1377 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1378 } 1379 work = a->mult_work; 1380 for ( i=0; i<mbs; i++ ) { 1381 n = ii[1] - ii[0]; ii++; 1382 ncols = n*bs; 1383 workt = work; 1384 for ( j=0; j<n; j++ ) { 1385 xb = x + bs*(*idx++); 1386 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1387 workt += bs; 1388 } 1389 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1390 v += n*bs2; 1391 z += bs; 1392 } 1393 VecRestoreArray_Fast(xx,x); 1394 VecRestoreArray_Fast(zz,z); 1395 PLogFlops(2*a->nz*bs2 ); 1396 return 0; 1397 } 1398 1399 #undef __FUNC__ 1400 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1401 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1402 { 1403 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1404 Scalar *xg,*zg,*zb; 1405 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1406 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1407 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1408 1409 1410 VecGetArray_Fast(xx,xg); x = xg; 1411 VecGetArray_Fast(zz,zg); z = zg; 1412 PetscMemzero(z,N*sizeof(Scalar)); 1413 1414 idx = a->j; 1415 v = a->a; 1416 ii = a->i; 1417 1418 switch (bs) { 1419 case 1: 1420 for ( i=0; i<mbs; i++ ) { 1421 n = ii[1] - ii[0]; ii++; 1422 xb = x + i; x1 = xb[0]; 1423 ib = idx + ai[i]; 1424 for ( j=0; j<n; j++ ) { 1425 rval = ib[j]; 1426 z[rval] += *v++ * x1; 1427 } 1428 } 1429 break; 1430 case 2: 1431 for ( i=0; i<mbs; i++ ) { 1432 n = ii[1] - ii[0]; ii++; 1433 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1434 ib = idx + ai[i]; 1435 for ( j=0; j<n; j++ ) { 1436 rval = ib[j]*2; 1437 z[rval++] += v[0]*x1 + v[1]*x2; 1438 z[rval++] += v[2]*x1 + v[3]*x2; 1439 v += 4; 1440 } 1441 } 1442 break; 1443 case 3: 1444 for ( i=0; i<mbs; i++ ) { 1445 n = ii[1] - ii[0]; ii++; 1446 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1447 ib = idx + ai[i]; 1448 for ( j=0; j<n; j++ ) { 1449 rval = ib[j]*3; 1450 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1451 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1452 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1453 v += 9; 1454 } 1455 } 1456 break; 1457 case 4: 1458 for ( i=0; i<mbs; i++ ) { 1459 n = ii[1] - ii[0]; ii++; 1460 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1461 ib = idx + ai[i]; 1462 for ( j=0; j<n; j++ ) { 1463 rval = ib[j]*4; 1464 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1465 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1466 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1467 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1468 v += 16; 1469 } 1470 } 1471 break; 1472 case 5: 1473 for ( i=0; i<mbs; i++ ) { 1474 n = ii[1] - ii[0]; ii++; 1475 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1476 x4 = xb[3]; x5 = xb[4]; 1477 ib = idx + ai[i]; 1478 for ( j=0; j<n; j++ ) { 1479 rval = ib[j]*5; 1480 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1481 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1482 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1483 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1484 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1485 v += 25; 1486 } 1487 } 1488 break; 1489 /* block sizes larger then 5 by 5 are handled by BLAS */ 1490 default: { 1491 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1492 if (!a->mult_work) { 1493 k = PetscMax(a->m,a->n); 1494 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1495 CHKPTRQ(a->mult_work); 1496 } 1497 work = a->mult_work; 1498 for ( i=0; i<mbs; i++ ) { 1499 n = ii[1] - ii[0]; ii++; 1500 ncols = n*bs; 1501 PetscMemzero(work,ncols*sizeof(Scalar)); 1502 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1503 v += n*bs2; 1504 x += bs; 1505 workt = work; 1506 for ( j=0; j<n; j++ ) { 1507 zb = z + bs*(*idx++); 1508 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1509 workt += bs; 1510 } 1511 } 1512 } 1513 } 1514 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1515 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1516 PLogFlops(2*a->nz*a->bs2 - a->n); 1517 return 0; 1518 } 1519 1520 #undef __FUNC__ 1521 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1522 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1523 { 1524 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1525 Scalar *xg,*zg,*zb; 1526 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1527 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1528 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1529 1530 1531 1532 VecGetArray_Fast(xx,xg); x = xg; 1533 VecGetArray_Fast(zz,zg); z = zg; 1534 1535 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1536 else PetscMemzero(z,N*sizeof(Scalar)); 1537 1538 1539 idx = a->j; 1540 v = a->a; 1541 ii = a->i; 1542 1543 switch (bs) { 1544 case 1: 1545 for ( i=0; i<mbs; i++ ) { 1546 n = ii[1] - ii[0]; ii++; 1547 xb = x + i; x1 = xb[0]; 1548 ib = idx + ai[i]; 1549 for ( j=0; j<n; j++ ) { 1550 rval = ib[j]; 1551 z[rval] += *v++ * x1; 1552 } 1553 } 1554 break; 1555 case 2: 1556 for ( i=0; i<mbs; i++ ) { 1557 n = ii[1] - ii[0]; ii++; 1558 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1559 ib = idx + ai[i]; 1560 for ( j=0; j<n; j++ ) { 1561 rval = ib[j]*2; 1562 z[rval++] += v[0]*x1 + v[1]*x2; 1563 z[rval++] += v[2]*x1 + v[3]*x2; 1564 v += 4; 1565 } 1566 } 1567 break; 1568 case 3: 1569 for ( i=0; i<mbs; i++ ) { 1570 n = ii[1] - ii[0]; ii++; 1571 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1572 ib = idx + ai[i]; 1573 for ( j=0; j<n; j++ ) { 1574 rval = ib[j]*3; 1575 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1576 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1577 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1578 v += 9; 1579 } 1580 } 1581 break; 1582 case 4: 1583 for ( i=0; i<mbs; i++ ) { 1584 n = ii[1] - ii[0]; ii++; 1585 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1586 ib = idx + ai[i]; 1587 for ( j=0; j<n; j++ ) { 1588 rval = ib[j]*4; 1589 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1590 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1591 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1592 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1593 v += 16; 1594 } 1595 } 1596 break; 1597 case 5: 1598 for ( i=0; i<mbs; i++ ) { 1599 n = ii[1] - ii[0]; ii++; 1600 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1601 x4 = xb[3]; x5 = xb[4]; 1602 ib = idx + ai[i]; 1603 for ( j=0; j<n; j++ ) { 1604 rval = ib[j]*5; 1605 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1606 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1607 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1608 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1609 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1610 v += 25; 1611 } 1612 } 1613 break; 1614 /* block sizes larger then 5 by 5 are handled by BLAS */ 1615 default: { 1616 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1617 if (!a->mult_work) { 1618 k = PetscMax(a->m,a->n); 1619 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1620 CHKPTRQ(a->mult_work); 1621 } 1622 work = a->mult_work; 1623 for ( i=0; i<mbs; i++ ) { 1624 n = ii[1] - ii[0]; ii++; 1625 ncols = n*bs; 1626 PetscMemzero(work,ncols*sizeof(Scalar)); 1627 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1628 v += n*bs2; 1629 x += bs; 1630 workt = work; 1631 for ( j=0; j<n; j++ ) { 1632 zb = z + bs*(*idx++); 1633 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1634 workt += bs; 1635 } 1636 } 1637 } 1638 } 1639 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1640 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1641 PLogFlops(2*a->nz*a->bs2); 1642 return 0; 1643 } 1644 1645 #undef __FUNC__ 1646 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */ 1647 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1648 { 1649 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1650 1651 info->rows_global = (double)a->m; 1652 info->columns_global = (double)a->n; 1653 info->rows_local = (double)a->m; 1654 info->columns_local = (double)a->n; 1655 info->block_size = a->bs2; 1656 info->nz_allocated = a->maxnz; 1657 info->nz_used = a->bs2*a->nz; 1658 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1659 /* if (info->nz_unneeded != A->info.nz_unneeded) 1660 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1661 info->assemblies = A->num_ass; 1662 info->mallocs = a->reallocs; 1663 info->memory = A->mem; 1664 if (A->factor) { 1665 info->fill_ratio_given = A->info.fill_ratio_given; 1666 info->fill_ratio_needed = A->info.fill_ratio_needed; 1667 info->factor_mallocs = A->info.factor_mallocs; 1668 } else { 1669 info->fill_ratio_given = 0; 1670 info->fill_ratio_needed = 0; 1671 info->factor_mallocs = 0; 1672 } 1673 return 0; 1674 } 1675 1676 #undef __FUNC__ 1677 #define __FUNC__ "MatEqual_SeqBAIJ" 1678 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1679 { 1680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1681 1682 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1683 1684 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1685 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1686 (a->nz != b->nz)) { 1687 *flg = PETSC_FALSE; return 0; 1688 } 1689 1690 /* if the a->i are the same */ 1691 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1692 *flg = PETSC_FALSE; return 0; 1693 } 1694 1695 /* if a->j are the same */ 1696 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1697 *flg = PETSC_FALSE; return 0; 1698 } 1699 1700 /* if a->a are the same */ 1701 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1702 *flg = PETSC_FALSE; return 0; 1703 } 1704 *flg = PETSC_TRUE; 1705 return 0; 1706 1707 } 1708 1709 #undef __FUNC__ 1710 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1711 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1712 { 1713 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1714 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1715 Scalar *x, zero = 0.0,*aa,*aa_j; 1716 1717 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1718 bs = a->bs; 1719 aa = a->a; 1720 ai = a->i; 1721 aj = a->j; 1722 ambs = a->mbs; 1723 bs2 = a->bs2; 1724 1725 VecSet(&zero,v); 1726 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1727 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1728 for ( i=0; i<ambs; i++ ) { 1729 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1730 if (aj[j] == i) { 1731 row = i*bs; 1732 aa_j = aa+j*bs2; 1733 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1734 break; 1735 } 1736 } 1737 } 1738 return 0; 1739 } 1740 1741 #undef __FUNC__ 1742 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1743 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1744 { 1745 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1746 Scalar *l,*r,x,*v,*aa,*li,*ri; 1747 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1748 1749 ai = a->i; 1750 aj = a->j; 1751 aa = a->a; 1752 m = a->m; 1753 n = a->n; 1754 bs = a->bs; 1755 mbs = a->mbs; 1756 bs2 = a->bs2; 1757 if (ll) { 1758 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1759 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1760 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1761 M = ai[i+1] - ai[i]; 1762 li = l + i*bs; 1763 v = aa + bs2*ai[i]; 1764 for ( j=0; j<M; j++ ) { /* for each block */ 1765 for ( k=0; k<bs2; k++ ) { 1766 (*v++) *= li[k%bs]; 1767 } 1768 } 1769 } 1770 } 1771 1772 if (rr) { 1773 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1774 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1775 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1776 M = ai[i+1] - ai[i]; 1777 v = aa + bs2*ai[i]; 1778 for ( j=0; j<M; j++ ) { /* for each block */ 1779 ri = r + bs*aj[ai[i]+j]; 1780 for ( k=0; k<bs; k++ ) { 1781 x = ri[k]; 1782 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1783 } 1784 } 1785 } 1786 } 1787 return 0; 1788 } 1789 1790 1791 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1792 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1793 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1794 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1795 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1796 1797 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1798 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1799 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1800 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1801 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1802 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1803 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1804 1805 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1806 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1807 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1808 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1809 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1810 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1811 1812 #undef __FUNC__ 1813 #define __FUNC__ "MatNorm_SeqBAIJ" 1814 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1815 { 1816 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1817 Scalar *v = a->a; 1818 double sum = 0.0; 1819 int i,nz=a->nz,bs2=a->bs2; 1820 1821 if (type == NORM_FROBENIUS) { 1822 for (i=0; i< bs2*nz; i++ ) { 1823 #if defined(PETSC_COMPLEX) 1824 sum += real(conj(*v)*(*v)); v++; 1825 #else 1826 sum += (*v)*(*v); v++; 1827 #endif 1828 } 1829 *norm = sqrt(sum); 1830 } 1831 else { 1832 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1833 } 1834 return 0; 1835 } 1836 1837 extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1838 /* 1839 note: This can only work for identity for row and col. It would 1840 be good to check this and otherwise generate an error. 1841 */ 1842 #undef __FUNC__ 1843 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1844 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1845 { 1846 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1847 Mat outA; 1848 int ierr; 1849 1850 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1851 1852 outA = inA; 1853 inA->factor = FACTOR_LU; 1854 a->row = row; 1855 a->col = col; 1856 1857 if (!a->solve_work) { 1858 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1859 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1860 } 1861 1862 if (!a->diag) { 1863 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1864 } 1865 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1866 1867 /* 1868 Blocksize 4 has a special faster solver for ILU(0) factorization 1869 with natural ordering 1870 */ 1871 if (a->bs == 4) { 1872 inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1873 } 1874 1875 return 0; 1876 } 1877 1878 #undef __FUNC__ 1879 #define __FUNC__ "MatScale_SeqBAIJ" 1880 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1881 { 1882 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1883 int one = 1, totalnz = a->bs2*a->nz; 1884 BLscal_( &totalnz, alpha, a->a, &one ); 1885 PLogFlops(totalnz); 1886 return 0; 1887 } 1888 1889 #undef __FUNC__ 1890 #define __FUNC__ "MatGetValues_SeqBAIJ" 1891 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1892 { 1893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1894 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1895 int *ai = a->i, *ailen = a->ilen; 1896 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1897 Scalar *ap, *aa = a->a, zero = 0.0; 1898 1899 for ( k=0; k<m; k++ ) { /* loop over rows */ 1900 row = im[k]; brow = row/bs; 1901 if (row < 0) SETERRQ(1,0,"Negative row"); 1902 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1903 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1904 nrow = ailen[brow]; 1905 for ( l=0; l<n; l++ ) { /* loop over columns */ 1906 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1907 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1908 col = in[l] ; 1909 bcol = col/bs; 1910 cidx = col%bs; 1911 ridx = row%bs; 1912 high = nrow; 1913 low = 0; /* assume unsorted */ 1914 while (high-low > 5) { 1915 t = (low+high)/2; 1916 if (rp[t] > bcol) high = t; 1917 else low = t; 1918 } 1919 for ( i=low; i<high; i++ ) { 1920 if (rp[i] > bcol) break; 1921 if (rp[i] == bcol) { 1922 *v++ = ap[bs2*i+bs*cidx+ridx]; 1923 goto finished; 1924 } 1925 } 1926 *v++ = zero; 1927 finished:; 1928 } 1929 } 1930 return 0; 1931 } 1932 1933 #undef __FUNC__ 1934 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */ 1935 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1936 { 1937 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1938 *bs = baij->bs; 1939 return 0; 1940 } 1941 1942 /* idx should be of length atlease bs */ 1943 #undef __FUNC__ 1944 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1945 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1946 { 1947 int i,row; 1948 row = idx[0]; 1949 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1950 1951 for ( i=1; i<bs; i++ ) { 1952 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1953 } 1954 *flg = PETSC_TRUE; 1955 return 0; 1956 } 1957 1958 #undef __FUNC__ 1959 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1960 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1961 { 1962 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1963 IS is_local; 1964 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1965 PetscTruth flg; 1966 Scalar zero = 0.0,*aa; 1967 1968 /* Make a copy of the IS and sort it */ 1969 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1970 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1971 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1972 ierr = ISSort(is_local); CHKERRQ(ierr); 1973 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1974 1975 i = 0; 1976 while (i < is_n) { 1977 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1978 flg = PETSC_FALSE; 1979 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1980 if (flg) { /* There exists a block of rows to be Zerowed */ 1981 baij->ilen[rows[i]/bs] = 0; 1982 i += bs; 1983 } else { /* Zero out only the requested row */ 1984 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1985 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1986 for ( j=0; j<count; j++ ) { 1987 aa[0] = zero; 1988 aa+=bs; 1989 } 1990 i++; 1991 } 1992 } 1993 if (diag) { 1994 for ( j=0; j<is_n; j++ ) { 1995 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1996 } 1997 } 1998 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1999 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 2000 ierr = ISDestroy(is_local); CHKERRQ(ierr); 2001 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2002 2003 return 0; 2004 } 2005 2006 #undef __FUNC__ 2007 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */ 2008 int MatPrintHelp_SeqBAIJ(Mat A) 2009 { 2010 static int called = 0; 2011 MPI_Comm comm = A->comm; 2012 2013 if (called) return 0; else called = 1; 2014 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2015 PetscPrintf(comm," -mat_block_size <block_size>\n"); 2016 return 0; 2017 } 2018 2019 /* -------------------------------------------------------------------*/ 2020 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 2021 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2022 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2023 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 2024 MatSolve_SeqBAIJ_N,0, 2025 0,0, 2026 MatLUFactor_SeqBAIJ,0, 2027 0, 2028 MatTranspose_SeqBAIJ, 2029 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2030 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2031 0,MatAssemblyEnd_SeqBAIJ, 2032 0, 2033 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2034 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2035 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2036 MatILUFactorSymbolic_SeqBAIJ,0, 2037 0,0, 2038 MatConvertSameType_SeqBAIJ,0,0, 2039 MatILUFactor_SeqBAIJ,0,0, 2040 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2041 MatGetValues_SeqBAIJ,0, 2042 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2043 0,0,0,MatGetBlockSize_SeqBAIJ, 2044 MatGetRowIJ_SeqBAIJ, 2045 MatRestoreRowIJ_SeqBAIJ, 2046 0,0,0,0,0,0, 2047 MatSetValuesBlocked_SeqBAIJ}; 2048 2049 #undef __FUNC__ 2050 #define __FUNC__ "MatCreateSeqBAIJ" 2051 /*@C 2052 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2053 compressed row) format. For good matrix assembly performance the 2054 user should preallocate the matrix storage by setting the parameter nz 2055 (or the array nzz). By setting these parameters accurately, performance 2056 during matrix assembly can be increased by more than a factor of 50. 2057 2058 Input Parameters: 2059 . comm - MPI communicator, set to PETSC_COMM_SELF 2060 . bs - size of block 2061 . m - number of rows 2062 . n - number of columns 2063 . nz - number of block nonzeros per block row (same for all rows) 2064 . nzz - array containing the number of block nonzeros in the various block rows 2065 (possibly different for each block row) or PETSC_NULL 2066 2067 Output Parameter: 2068 . A - the matrix 2069 2070 Options Database Keys: 2071 $ -mat_no_unroll - uses code that does not unroll the loops in the 2072 $ block calculations (much solver) 2073 $ -mat_block_size - size of the blocks to use 2074 2075 Notes: 2076 The block AIJ format is fully compatible with standard Fortran 77 2077 storage. That is, the stored row and column indices can begin at 2078 either one (as in Fortran) or zero. See the users' manual for details. 2079 2080 Specify the preallocated storage with either nz or nnz (not both). 2081 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2082 allocation. For additional details, see the users manual chapter on 2083 matrices. 2084 2085 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2086 @*/ 2087 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2088 { 2089 Mat B; 2090 Mat_SeqBAIJ *b; 2091 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2092 2093 MPI_Comm_size(comm,&size); 2094 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2095 2096 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2097 2098 if (mbs*bs!=m || nbs*bs!=n) 2099 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2100 2101 *A = 0; 2102 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2103 PLogObjectCreate(B); 2104 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2105 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2106 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2107 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2108 if (!flg) { 2109 switch (bs) { 2110 case 1: 2111 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2112 B->ops.solve = MatSolve_SeqBAIJ_1; 2113 B->ops.mult = MatMult_SeqBAIJ_1; 2114 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2115 break; 2116 case 2: 2117 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2118 B->ops.solve = MatSolve_SeqBAIJ_2; 2119 B->ops.mult = MatMult_SeqBAIJ_2; 2120 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2121 break; 2122 case 3: 2123 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2124 B->ops.solve = MatSolve_SeqBAIJ_3; 2125 B->ops.mult = MatMult_SeqBAIJ_3; 2126 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2127 break; 2128 case 4: 2129 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2130 B->ops.solve = MatSolve_SeqBAIJ_4; 2131 B->ops.mult = MatMult_SeqBAIJ_4; 2132 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2133 break; 2134 case 5: 2135 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2136 B->ops.solve = MatSolve_SeqBAIJ_5; 2137 B->ops.mult = MatMult_SeqBAIJ_5; 2138 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2139 break; 2140 case 7: 2141 B->ops.mult = MatMult_SeqBAIJ_7; 2142 B->ops.solve = MatSolve_SeqBAIJ_7; 2143 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2144 break; 2145 } 2146 } 2147 B->destroy = MatDestroy_SeqBAIJ; 2148 B->view = MatView_SeqBAIJ; 2149 B->factor = 0; 2150 B->lupivotthreshold = 1.0; 2151 B->mapping = 0; 2152 b->row = 0; 2153 b->col = 0; 2154 b->reallocs = 0; 2155 2156 b->m = m; B->m = m; B->M = m; 2157 b->n = n; B->n = n; B->N = n; 2158 b->mbs = mbs; 2159 b->nbs = nbs; 2160 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2161 if (nnz == PETSC_NULL) { 2162 if (nz == PETSC_DEFAULT) nz = 5; 2163 else if (nz <= 0) nz = 1; 2164 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2165 nz = nz*mbs; 2166 } 2167 else { 2168 nz = 0; 2169 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2170 } 2171 2172 /* allocate the matrix space */ 2173 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2174 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2175 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2176 b->j = (int *) (b->a + nz*bs2); 2177 PetscMemzero(b->j,nz*sizeof(int)); 2178 b->i = b->j + nz; 2179 b->singlemalloc = 1; 2180 2181 b->i[0] = 0; 2182 for (i=1; i<mbs+1; i++) { 2183 b->i[i] = b->i[i-1] + b->imax[i-1]; 2184 } 2185 2186 /* b->ilen will count nonzeros in each block row so far. */ 2187 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2188 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2189 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2190 2191 b->bs = bs; 2192 b->bs2 = bs2; 2193 b->mbs = mbs; 2194 b->nz = 0; 2195 b->maxnz = nz*bs2; 2196 b->sorted = 0; 2197 b->roworiented = 1; 2198 b->nonew = 0; 2199 b->diag = 0; 2200 b->solve_work = 0; 2201 b->mult_work = 0; 2202 b->spptr = 0; 2203 B->info.nz_unneeded = (double)b->maxnz; 2204 2205 *A = B; 2206 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2207 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2208 return 0; 2209 } 2210 2211 #undef __FUNC__ 2212 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2213 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2214 { 2215 Mat C; 2216 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2217 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2218 2219 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2220 2221 *B = 0; 2222 PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2223 PLogObjectCreate(C); 2224 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2225 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2226 C->destroy = MatDestroy_SeqBAIJ; 2227 C->view = MatView_SeqBAIJ; 2228 C->factor = A->factor; 2229 c->row = 0; 2230 c->col = 0; 2231 C->assembled = PETSC_TRUE; 2232 2233 c->m = C->m = a->m; 2234 c->n = C->n = a->n; 2235 C->M = a->m; 2236 C->N = a->n; 2237 2238 c->bs = a->bs; 2239 c->bs2 = a->bs2; 2240 c->mbs = a->mbs; 2241 c->nbs = a->nbs; 2242 2243 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2244 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2245 for ( i=0; i<mbs; i++ ) { 2246 c->imax[i] = a->imax[i]; 2247 c->ilen[i] = a->ilen[i]; 2248 } 2249 2250 /* allocate the matrix space */ 2251 c->singlemalloc = 1; 2252 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2253 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2254 c->j = (int *) (c->a + nz*bs2); 2255 c->i = c->j + nz; 2256 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2257 if (mbs > 0) { 2258 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2259 if (cpvalues == COPY_VALUES) { 2260 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2261 } 2262 } 2263 2264 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2265 c->sorted = a->sorted; 2266 c->roworiented = a->roworiented; 2267 c->nonew = a->nonew; 2268 2269 if (a->diag) { 2270 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2271 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2272 for ( i=0; i<mbs; i++ ) { 2273 c->diag[i] = a->diag[i]; 2274 } 2275 } 2276 else c->diag = 0; 2277 c->nz = a->nz; 2278 c->maxnz = a->maxnz; 2279 c->solve_work = 0; 2280 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2281 c->mult_work = 0; 2282 *B = C; 2283 return 0; 2284 } 2285 2286 #undef __FUNC__ 2287 #define __FUNC__ "MatLoad_SeqBAIJ" 2288 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2289 { 2290 Mat_SeqBAIJ *a; 2291 Mat B; 2292 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2293 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2294 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2295 int *masked, nmask,tmp,bs2,ishift; 2296 Scalar *aa; 2297 MPI_Comm comm = ((PetscObject) viewer)->comm; 2298 2299 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2300 bs2 = bs*bs; 2301 2302 MPI_Comm_size(comm,&size); 2303 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2304 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2305 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2306 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2307 M = header[1]; N = header[2]; nz = header[3]; 2308 2309 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2310 2311 /* 2312 This code adds extra rows to make sure the number of rows is 2313 divisible by the blocksize 2314 */ 2315 mbs = M/bs; 2316 extra_rows = bs - M + bs*(mbs); 2317 if (extra_rows == bs) extra_rows = 0; 2318 else mbs++; 2319 if (extra_rows) { 2320 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2321 } 2322 2323 /* read in row lengths */ 2324 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2325 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2326 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2327 2328 /* read in column indices */ 2329 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2330 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2331 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2332 2333 /* loop over row lengths determining block row lengths */ 2334 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2335 PetscMemzero(browlengths,mbs*sizeof(int)); 2336 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2337 PetscMemzero(mask,mbs*sizeof(int)); 2338 masked = mask + mbs; 2339 rowcount = 0; nzcount = 0; 2340 for ( i=0; i<mbs; i++ ) { 2341 nmask = 0; 2342 for ( j=0; j<bs; j++ ) { 2343 kmax = rowlengths[rowcount]; 2344 for ( k=0; k<kmax; k++ ) { 2345 tmp = jj[nzcount++]/bs; 2346 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2347 } 2348 rowcount++; 2349 } 2350 browlengths[i] += nmask; 2351 /* zero out the mask elements we set */ 2352 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2353 } 2354 2355 /* create our matrix */ 2356 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 2357 B = *A; 2358 a = (Mat_SeqBAIJ *) B->data; 2359 2360 /* set matrix "i" values */ 2361 a->i[0] = 0; 2362 for ( i=1; i<= mbs; i++ ) { 2363 a->i[i] = a->i[i-1] + browlengths[i-1]; 2364 a->ilen[i-1] = browlengths[i-1]; 2365 } 2366 a->nz = 0; 2367 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2368 2369 /* read in nonzero values */ 2370 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2371 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2372 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2373 2374 /* set "a" and "j" values into matrix */ 2375 nzcount = 0; jcount = 0; 2376 for ( i=0; i<mbs; i++ ) { 2377 nzcountb = nzcount; 2378 nmask = 0; 2379 for ( j=0; j<bs; j++ ) { 2380 kmax = rowlengths[i*bs+j]; 2381 for ( k=0; k<kmax; k++ ) { 2382 tmp = jj[nzcount++]/bs; 2383 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2384 } 2385 rowcount++; 2386 } 2387 /* sort the masked values */ 2388 PetscSortInt(nmask,masked); 2389 2390 /* set "j" values into matrix */ 2391 maskcount = 1; 2392 for ( j=0; j<nmask; j++ ) { 2393 a->j[jcount++] = masked[j]; 2394 mask[masked[j]] = maskcount++; 2395 } 2396 /* set "a" values into matrix */ 2397 ishift = bs2*a->i[i]; 2398 for ( j=0; j<bs; j++ ) { 2399 kmax = rowlengths[i*bs+j]; 2400 for ( k=0; k<kmax; k++ ) { 2401 tmp = jj[nzcountb]/bs ; 2402 block = mask[tmp] - 1; 2403 point = jj[nzcountb] - bs*tmp; 2404 idx = ishift + bs2*block + j + bs*point; 2405 a->a[idx] = aa[nzcountb++]; 2406 } 2407 } 2408 /* zero out the mask elements we set */ 2409 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2410 } 2411 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2412 2413 PetscFree(rowlengths); 2414 PetscFree(browlengths); 2415 PetscFree(aa); 2416 PetscFree(jj); 2417 PetscFree(mask); 2418 2419 B->assembled = PETSC_TRUE; 2420 2421 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2422 if (flg) { 2423 Viewer tviewer; 2424 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2425 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2426 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2427 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2428 } 2429 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2430 if (flg) { 2431 Viewer tviewer; 2432 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2433 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2434 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2435 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2436 } 2437 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2438 if (flg) { 2439 Viewer tviewer; 2440 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2441 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2442 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2443 } 2444 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2445 if (flg) { 2446 Viewer tviewer; 2447 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2448 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2449 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2450 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2451 } 2452 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2453 if (flg) { 2454 Viewer tviewer; 2455 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2456 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2457 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2458 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2459 } 2460 return 0; 2461 } 2462 2463 2464 2465