1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 PetscScalar *value = v; 31 MatScalar *ap,*aa = a->a,*bap; 32 33 PetscFunctionBegin; 34 stepval = (n-1)*4; 35 for (k=0; k<m; k++) { /* loop over added rows */ 36 row = im[k]; 37 rp = aj + ai[row]; 38 ap = aa + 16*ai[row]; 39 nrow = ailen[row]; 40 low = 0; 41 for (l=0; l<n; l++) { /* loop over added columns */ 42 col = in[l]; 43 value = v + k*(stepval+4)*4 + l*4; 44 low = 0; high = nrow; 45 while (high-low > 7) { 46 t = (low+high)/2; 47 if (rp[t] > col) high = t; 48 else low = t; 49 } 50 for (i=low; i<high; i++) { 51 if (rp[i] > col) break; 52 if (rp[i] == col) { 53 bap = ap + 16*i; 54 for (ii=0; ii<4; ii++,value+=stepval) { 55 for (jj=ii; jj<16; jj+=4) { 56 bap[jj] += *value++; 57 } 58 } 59 goto noinsert2; 60 } 61 } 62 N = nrow++ - 1; 63 /* shift up all the later entries in this row */ 64 for (ii=N; ii>=i; ii--) { 65 rp[ii+1] = rp[ii]; 66 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67 } 68 if (N >= i) { 69 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70 } 71 rp[i] = col; 72 bap = ap + 16*i; 73 for (ii=0; ii<4; ii++,value+=stepval) { 74 for (jj=ii; jj<16; jj+=4) { 75 bap[jj] = *value++; 76 } 77 } 78 noinsert2:; 79 low = i; 80 } 81 ailen[row] = nrow; 82 } 83 } 84 85 #if defined(PETSC_HAVE_FORTRAN_CAPS) 86 #define matsetvalues4_ MATSETVALUES4 87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88 #define matsetvalues4_ matsetvalues4 89 #endif 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatSetValues4_" 93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94 { 95 Mat A = *AA; 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 98 int *ai=a->i,*ailen=a->ilen; 99 int *aj=a->j,brow,bcol; 100 int ridx,cidx; 101 MatScalar *ap,value,*aa=a->a,*bap; 102 103 PetscFunctionBegin; 104 for (k=0; k<m; k++) { /* loop over added rows */ 105 row = im[k]; brow = row/4; 106 rp = aj + ai[brow]; 107 ap = aa + 16*ai[brow]; 108 nrow = ailen[brow]; 109 low = 0; 110 for (l=0; l<n; l++) { /* loop over added columns */ 111 col = in[l]; bcol = col/4; 112 ridx = row % 4; cidx = col % 4; 113 value = v[l + k*n]; 114 low = 0; high = nrow; 115 while (high-low > 7) { 116 t = (low+high)/2; 117 if (rp[t] > bcol) high = t; 118 else low = t; 119 } 120 for (i=low; i<high; i++) { 121 if (rp[i] > bcol) break; 122 if (rp[i] == bcol) { 123 bap = ap + 16*i + 4*cidx + ridx; 124 *bap += value; 125 goto noinsert1; 126 } 127 } 128 N = nrow++ - 1; 129 /* shift up all the later entries in this row */ 130 for (ii=N; ii>=i; ii--) { 131 rp[ii+1] = rp[ii]; 132 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133 } 134 if (N>=i) { 135 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136 } 137 rp[i] = bcol; 138 ap[16*i + 4*cidx + ridx] = value; 139 noinsert1:; 140 low = i; 141 } 142 ailen[brow] = nrow; 143 } 144 } 145 146 /* UGLY, ugly, ugly 147 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 148 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 149 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 150 converts the entries into single precision and then calls ..._MatScalar() to put them 151 into the single precision data structures. 152 */ 153 #if defined(PETSC_USE_MAT_SINGLE) 154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 155 #else 156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 157 #endif 158 #if defined(PETSC_HAVE_DSCPACK) 159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 160 #endif 161 162 #define CHUNKSIZE 10 163 164 /* 165 Checks for missing diagonals 166 */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169 int MatMissingDiagonal_SeqBAIJ(Mat A) 170 { 171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172 int *diag,*jj = a->j,i,ierr; 173 174 PetscFunctionBegin; 175 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176 diag = a->diag; 177 for (i=0; i<a->mbs; i++) { 178 if (jj[diag[i]] != i) { 179 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187 int MatMarkDiagonal_SeqBAIJ(Mat A) 188 { 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 190 int i,j,*diag,m = a->mbs,ierr; 191 192 PetscFunctionBegin; 193 if (a->diag) PetscFunctionReturn(0); 194 195 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 197 for (i=0; i<m; i++) { 198 diag[i] = a->i[i+1]; 199 for (j=a->i[i]; j<a->i[i+1]; j++) { 200 if (a->j[j] == i) { 201 diag[i] = j; 202 break; 203 } 204 } 205 } 206 a->diag = diag; 207 PetscFunctionReturn(0); 208 } 209 210 211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 int ierr,n = a->mbs,i; 219 220 PetscFunctionBegin; 221 *nn = n; 222 if (!ia) PetscFunctionReturn(0); 223 if (symmetric) { 224 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 225 } else if (oshift == 1) { 226 /* temporarily add 1 to i and j indices */ 227 int nz = a->i[n]; 228 for (i=0; i<nz; i++) a->j[i]++; 229 for (i=0; i<n+1; i++) a->i[i]++; 230 *ia = a->i; *ja = a->j; 231 } else { 232 *ia = a->i; *ja = a->j; 233 } 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 int i,n = a->mbs,ierr; 244 245 PetscFunctionBegin; 246 if (!ia) PetscFunctionReturn(0); 247 if (symmetric) { 248 ierr = PetscFree(*ia);CHKERRQ(ierr); 249 ierr = PetscFree(*ja);CHKERRQ(ierr); 250 } else if (oshift == 1) { 251 int nz = a->i[n]-1; 252 for (i=0; i<nz; i++) a->j[i]--; 253 for (i=0; i<n+1; i++) a->i[i]--; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 261 { 262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 263 264 PetscFunctionBegin; 265 *bs = baij->bs; 266 PetscFunctionReturn(0); 267 } 268 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 272 int MatDestroy_SeqBAIJ(Mat A) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275 int ierr; 276 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_LOG) 279 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 280 #endif 281 ierr = PetscFree(a->a);CHKERRQ(ierr); 282 if (!a->singlemalloc) { 283 ierr = PetscFree(a->i);CHKERRQ(ierr); 284 ierr = PetscFree(a->j);CHKERRQ(ierr); 285 } 286 if (a->row) { 287 ierr = ISDestroy(a->row);CHKERRQ(ierr); 288 } 289 if (a->col) { 290 ierr = ISDestroy(a->col);CHKERRQ(ierr); 291 } 292 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299 #if defined(PETSC_USE_MAT_SINGLE) 300 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301 #endif 302 ierr = PetscFree(a);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetOption_SeqBAIJ" 308 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 309 { 310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 311 312 PetscFunctionBegin; 313 switch (op) { 314 case MAT_ROW_ORIENTED: 315 a->roworiented = PETSC_TRUE; 316 break; 317 case MAT_COLUMN_ORIENTED: 318 a->roworiented = PETSC_FALSE; 319 break; 320 case MAT_COLUMNS_SORTED: 321 a->sorted = PETSC_TRUE; 322 break; 323 case MAT_COLUMNS_UNSORTED: 324 a->sorted = PETSC_FALSE; 325 break; 326 case MAT_KEEP_ZEROED_ROWS: 327 a->keepzeroedrows = PETSC_TRUE; 328 break; 329 case MAT_NO_NEW_NONZERO_LOCATIONS: 330 a->nonew = 1; 331 break; 332 case MAT_NEW_NONZERO_LOCATION_ERR: 333 a->nonew = -1; 334 break; 335 case MAT_NEW_NONZERO_ALLOCATION_ERR: 336 a->nonew = -2; 337 break; 338 case MAT_YES_NEW_NONZERO_LOCATIONS: 339 a->nonew = 0; 340 break; 341 case MAT_ROWS_SORTED: 342 case MAT_ROWS_UNSORTED: 343 case MAT_YES_NEW_DIAGONALS: 344 case MAT_IGNORE_OFF_PROC_ENTRIES: 345 case MAT_USE_HASH_TABLE: 346 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347 break; 348 case MAT_NO_NEW_DIAGONALS: 349 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350 case MAT_USE_SINGLE_PRECISION_SOLVES: 351 if (a->bs==4) { 352 PetscTruth sse_enabled_local,sse_enabled_global; 353 int ierr; 354 355 sse_enabled_local = PETSC_FALSE; 356 sse_enabled_global = PETSC_FALSE; 357 358 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 359 #if defined(PETSC_HAVE_SSE) 360 if (sse_enabled_local) { 361 a->single_precision_solves = PETSC_TRUE; 362 A->ops->solve = MatSolve_SeqBAIJ_Update; 363 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 364 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 365 break; 366 } else { 367 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 368 } 369 #else 370 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 371 #endif 372 } 373 break; 374 default: 375 SETERRQ(PETSC_ERR_SUP,"unknown option"); 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatGetRow_SeqBAIJ" 382 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 383 { 384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 385 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 386 MatScalar *aa,*aa_i; 387 PetscScalar *v_i; 388 389 PetscFunctionBegin; 390 bs = a->bs; 391 ai = a->i; 392 aj = a->j; 393 aa = a->a; 394 bs2 = a->bs2; 395 396 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 397 398 bn = row/bs; /* Block number */ 399 bp = row % bs; /* Block Position */ 400 M = ai[bn+1] - ai[bn]; 401 *nz = bs*M; 402 403 if (v) { 404 *v = 0; 405 if (*nz) { 406 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 407 for (i=0; i<M; i++) { /* for each block in the block row */ 408 v_i = *v + i*bs; 409 aa_i = aa + bs2*(ai[bn] + i); 410 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 411 } 412 } 413 } 414 415 if (idx) { 416 *idx = 0; 417 if (*nz) { 418 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 419 for (i=0; i<M; i++) { /* for each block in the block row */ 420 idx_i = *idx + i*bs; 421 itmp = bs*aj[ai[bn] + i]; 422 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 423 } 424 } 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 431 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 432 { 433 int ierr; 434 435 PetscFunctionBegin; 436 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 437 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatTranspose_SeqBAIJ" 443 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 444 { 445 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 446 Mat C; 447 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 448 int *rows,*cols,bs2=a->bs2; 449 PetscScalar *array; 450 451 PetscFunctionBegin; 452 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 453 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 454 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 455 456 #if defined(PETSC_USE_MAT_SINGLE) 457 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 458 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 459 #else 460 array = a->a; 461 #endif 462 463 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 464 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 465 ierr = PetscFree(col);CHKERRQ(ierr); 466 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 467 cols = rows + bs; 468 for (i=0; i<mbs; i++) { 469 cols[0] = i*bs; 470 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 471 len = ai[i+1] - ai[i]; 472 for (j=0; j<len; j++) { 473 rows[0] = (*aj++)*bs; 474 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 475 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 476 array += bs2; 477 } 478 } 479 ierr = PetscFree(rows);CHKERRQ(ierr); 480 #if defined(PETSC_USE_MAT_SINGLE) 481 ierr = PetscFree(array); 482 #endif 483 484 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 485 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 486 487 if (B) { 488 *B = C; 489 } else { 490 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 491 } 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 497 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 498 { 499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 500 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 501 PetscScalar *aa; 502 FILE *file; 503 504 PetscFunctionBegin; 505 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 506 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 507 col_lens[0] = MAT_FILE_COOKIE; 508 509 col_lens[1] = A->m; 510 col_lens[2] = A->n; 511 col_lens[3] = a->nz*bs2; 512 513 /* store lengths of each row and write (including header) to file */ 514 count = 0; 515 for (i=0; i<a->mbs; i++) { 516 for (j=0; j<bs; j++) { 517 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 518 } 519 } 520 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 521 ierr = PetscFree(col_lens);CHKERRQ(ierr); 522 523 /* store column indices (zero start index) */ 524 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 525 count = 0; 526 for (i=0; i<a->mbs; i++) { 527 for (j=0; j<bs; j++) { 528 for (k=a->i[i]; k<a->i[i+1]; k++) { 529 for (l=0; l<bs; l++) { 530 jj[count++] = bs*a->j[k] + l; 531 } 532 } 533 } 534 } 535 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 536 ierr = PetscFree(jj);CHKERRQ(ierr); 537 538 /* store nonzero values */ 539 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 540 count = 0; 541 for (i=0; i<a->mbs; i++) { 542 for (j=0; j<bs; j++) { 543 for (k=a->i[i]; k<a->i[i+1]; k++) { 544 for (l=0; l<bs; l++) { 545 aa[count++] = a->a[bs2*k + l*bs + j]; 546 } 547 } 548 } 549 } 550 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 551 ierr = PetscFree(aa);CHKERRQ(ierr); 552 553 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 554 if (file) { 555 fprintf(file,"-matload_block_size %d\n",a->bs); 556 } 557 PetscFunctionReturn(0); 558 } 559 560 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 564 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 565 { 566 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 567 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 568 PetscViewerFormat format; 569 570 PetscFunctionBegin; 571 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 572 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 573 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 574 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 575 Mat aij; 576 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 577 ierr = MatView(aij,viewer);CHKERRQ(ierr); 578 ierr = MatDestroy(aij);CHKERRQ(ierr); 579 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 580 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 581 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 582 #endif 583 PetscFunctionReturn(0); 584 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 586 for (i=0; i<a->mbs; i++) { 587 for (j=0; j<bs; j++) { 588 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 589 for (k=a->i[i]; k<a->i[i+1]; k++) { 590 for (l=0; l<bs; l++) { 591 #if defined(PETSC_USE_COMPLEX) 592 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 593 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 594 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 595 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 596 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 597 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 598 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 599 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 600 } 601 #else 602 if (a->a[bs2*k + l*bs + j] != 0.0) { 603 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 604 } 605 #endif 606 } 607 } 608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 609 } 610 } 611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 612 } else { 613 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 614 for (i=0; i<a->mbs; i++) { 615 for (j=0; j<bs; j++) { 616 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 617 for (k=a->i[i]; k<a->i[i+1]; k++) { 618 for (l=0; l<bs; l++) { 619 #if defined(PETSC_USE_COMPLEX) 620 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 621 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 622 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 623 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 624 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 625 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 626 } else { 627 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 628 } 629 #else 630 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 631 #endif 632 } 633 } 634 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 638 } 639 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 645 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 646 { 647 Mat A = (Mat) Aa; 648 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 649 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 650 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 651 MatScalar *aa; 652 PetscViewer viewer; 653 654 PetscFunctionBegin; 655 656 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 657 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 658 659 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 660 661 /* loop over matrix elements drawing boxes */ 662 color = PETSC_DRAW_BLUE; 663 for (i=0,row=0; i<mbs; i++,row+=bs) { 664 for (j=a->i[i]; j<a->i[i+1]; j++) { 665 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 666 x_l = a->j[j]*bs; x_r = x_l + 1.0; 667 aa = a->a + j*bs2; 668 for (k=0; k<bs; k++) { 669 for (l=0; l<bs; l++) { 670 if (PetscRealPart(*aa++) >= 0.) continue; 671 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 672 } 673 } 674 } 675 } 676 color = PETSC_DRAW_CYAN; 677 for (i=0,row=0; i<mbs; i++,row+=bs) { 678 for (j=a->i[i]; j<a->i[i+1]; j++) { 679 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 680 x_l = a->j[j]*bs; x_r = x_l + 1.0; 681 aa = a->a + j*bs2; 682 for (k=0; k<bs; k++) { 683 for (l=0; l<bs; l++) { 684 if (PetscRealPart(*aa++) != 0.) continue; 685 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 686 } 687 } 688 } 689 } 690 691 color = PETSC_DRAW_RED; 692 for (i=0,row=0; i<mbs; i++,row+=bs) { 693 for (j=a->i[i]; j<a->i[i+1]; j++) { 694 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 695 x_l = a->j[j]*bs; x_r = x_l + 1.0; 696 aa = a->a + j*bs2; 697 for (k=0; k<bs; k++) { 698 for (l=0; l<bs; l++) { 699 if (PetscRealPart(*aa++) <= 0.) continue; 700 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 701 } 702 } 703 } 704 } 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 710 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 711 { 712 int ierr; 713 PetscReal xl,yl,xr,yr,w,h; 714 PetscDraw draw; 715 PetscTruth isnull; 716 717 PetscFunctionBegin; 718 719 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 720 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 721 722 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 723 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 724 xr += w; yr += h; xl = -w; yl = -h; 725 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 726 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 727 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatView_SeqBAIJ" 733 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 734 { 735 int ierr; 736 PetscTruth issocket,isascii,isbinary,isdraw; 737 738 PetscFunctionBegin; 739 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 740 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 741 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 742 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 743 if (issocket) { 744 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 745 } else if (isascii){ 746 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 747 } else if (isbinary) { 748 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 749 } else if (isdraw) { 750 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 751 } else { 752 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatGetValues_SeqBAIJ" 760 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 761 { 762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 763 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 764 int *ai = a->i,*ailen = a->ilen; 765 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 766 MatScalar *ap,*aa = a->a,zero = 0.0; 767 768 PetscFunctionBegin; 769 for (k=0; k<m; k++) { /* loop over rows */ 770 row = im[k]; brow = row/bs; 771 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 772 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 773 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 774 nrow = ailen[brow]; 775 for (l=0; l<n; l++) { /* loop over columns */ 776 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 777 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 778 col = in[l] ; 779 bcol = col/bs; 780 cidx = col%bs; 781 ridx = row%bs; 782 high = nrow; 783 low = 0; /* assume unsorted */ 784 while (high-low > 5) { 785 t = (low+high)/2; 786 if (rp[t] > bcol) high = t; 787 else low = t; 788 } 789 for (i=low; i<high; i++) { 790 if (rp[i] > bcol) break; 791 if (rp[i] == bcol) { 792 *v++ = ap[bs2*i+bs*cidx+ridx]; 793 goto finished; 794 } 795 } 796 *v++ = zero; 797 finished:; 798 } 799 } 800 PetscFunctionReturn(0); 801 } 802 803 #if defined(PETSC_USE_MAT_SINGLE) 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 806 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 807 { 808 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 809 int ierr,i,N = m*n*b->bs2; 810 MatScalar *vsingle; 811 812 PetscFunctionBegin; 813 if (N > b->setvalueslen) { 814 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 815 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 816 b->setvalueslen = N; 817 } 818 vsingle = b->setvaluescopy; 819 for (i=0; i<N; i++) { 820 vsingle[i] = v[i]; 821 } 822 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 #endif 826 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 830 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 831 { 832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 833 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 834 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 835 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 836 PetscTruth roworiented=a->roworiented; 837 MatScalar *value = v,*ap,*aa = a->a,*bap; 838 839 PetscFunctionBegin; 840 if (roworiented) { 841 stepval = (n-1)*bs; 842 } else { 843 stepval = (m-1)*bs; 844 } 845 for (k=0; k<m; k++) { /* loop over added rows */ 846 row = im[k]; 847 if (row < 0) continue; 848 #if defined(PETSC_USE_BOPT_g) 849 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 850 #endif 851 rp = aj + ai[row]; 852 ap = aa + bs2*ai[row]; 853 rmax = imax[row]; 854 nrow = ailen[row]; 855 low = 0; 856 for (l=0; l<n; l++) { /* loop over added columns */ 857 if (in[l] < 0) continue; 858 #if defined(PETSC_USE_BOPT_g) 859 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 860 #endif 861 col = in[l]; 862 if (roworiented) { 863 value = v + k*(stepval+bs)*bs + l*bs; 864 } else { 865 value = v + l*(stepval+bs)*bs + k*bs; 866 } 867 if (!sorted) low = 0; high = nrow; 868 while (high-low > 7) { 869 t = (low+high)/2; 870 if (rp[t] > col) high = t; 871 else low = t; 872 } 873 for (i=low; i<high; i++) { 874 if (rp[i] > col) break; 875 if (rp[i] == col) { 876 bap = ap + bs2*i; 877 if (roworiented) { 878 if (is == ADD_VALUES) { 879 for (ii=0; ii<bs; ii++,value+=stepval) { 880 for (jj=ii; jj<bs2; jj+=bs) { 881 bap[jj] += *value++; 882 } 883 } 884 } else { 885 for (ii=0; ii<bs; ii++,value+=stepval) { 886 for (jj=ii; jj<bs2; jj+=bs) { 887 bap[jj] = *value++; 888 } 889 } 890 } 891 } else { 892 if (is == ADD_VALUES) { 893 for (ii=0; ii<bs; ii++,value+=stepval) { 894 for (jj=0; jj<bs; jj++) { 895 *bap++ += *value++; 896 } 897 } 898 } else { 899 for (ii=0; ii<bs; ii++,value+=stepval) { 900 for (jj=0; jj<bs; jj++) { 901 *bap++ = *value++; 902 } 903 } 904 } 905 } 906 goto noinsert2; 907 } 908 } 909 if (nonew == 1) goto noinsert2; 910 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 911 if (nrow >= rmax) { 912 /* there is no extra room in row, therefore enlarge */ 913 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 914 MatScalar *new_a; 915 916 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 917 918 /* malloc new storage space */ 919 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 920 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 921 new_j = (int*)(new_a + bs2*new_nz); 922 new_i = new_j + new_nz; 923 924 /* copy over old data into new slots */ 925 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 926 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 927 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 928 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 929 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 930 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 931 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 932 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 933 /* free up old matrix storage */ 934 ierr = PetscFree(a->a);CHKERRQ(ierr); 935 if (!a->singlemalloc) { 936 ierr = PetscFree(a->i);CHKERRQ(ierr); 937 ierr = PetscFree(a->j);CHKERRQ(ierr); 938 } 939 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 940 a->singlemalloc = PETSC_TRUE; 941 942 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 943 rmax = imax[row] = imax[row] + CHUNKSIZE; 944 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 945 a->maxnz += bs2*CHUNKSIZE; 946 a->reallocs++; 947 a->nz++; 948 } 949 N = nrow++ - 1; 950 /* shift up all the later entries in this row */ 951 for (ii=N; ii>=i; ii--) { 952 rp[ii+1] = rp[ii]; 953 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 954 } 955 if (N >= i) { 956 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 957 } 958 rp[i] = col; 959 bap = ap + bs2*i; 960 if (roworiented) { 961 for (ii=0; ii<bs; ii++,value+=stepval) { 962 for (jj=ii; jj<bs2; jj+=bs) { 963 bap[jj] = *value++; 964 } 965 } 966 } else { 967 for (ii=0; ii<bs; ii++,value+=stepval) { 968 for (jj=0; jj<bs; jj++) { 969 *bap++ = *value++; 970 } 971 } 972 } 973 noinsert2:; 974 low = i; 975 } 976 ailen[row] = nrow; 977 } 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 983 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 984 { 985 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 986 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 987 int m = A->m,*ip,N,*ailen = a->ilen; 988 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 989 MatScalar *aa = a->a,*ap; 990 #if defined(PETSC_HAVE_DSCPACK) 991 PetscTruth flag; 992 #endif 993 994 PetscFunctionBegin; 995 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 996 997 if (m) rmax = ailen[0]; 998 for (i=1; i<mbs; i++) { 999 /* move each row back by the amount of empty slots (fshift) before it*/ 1000 fshift += imax[i-1] - ailen[i-1]; 1001 rmax = PetscMax(rmax,ailen[i]); 1002 if (fshift) { 1003 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1004 N = ailen[i]; 1005 for (j=0; j<N; j++) { 1006 ip[j-fshift] = ip[j]; 1007 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1008 } 1009 } 1010 ai[i] = ai[i-1] + ailen[i-1]; 1011 } 1012 if (mbs) { 1013 fshift += imax[mbs-1] - ailen[mbs-1]; 1014 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1015 } 1016 /* reset ilen and imax for each row */ 1017 for (i=0; i<mbs; i++) { 1018 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1019 } 1020 a->nz = ai[mbs]; 1021 1022 /* diagonals may have moved, so kill the diagonal pointers */ 1023 if (fshift && a->diag) { 1024 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1025 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1026 a->diag = 0; 1027 } 1028 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1029 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1030 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1031 a->reallocs = 0; 1032 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1033 1034 #if defined(PETSC_HAVE_DSCPACK) 1035 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1036 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1037 #endif 1038 1039 PetscFunctionReturn(0); 1040 } 1041 1042 1043 1044 /* 1045 This function returns an array of flags which indicate the locations of contiguous 1046 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1047 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1048 Assume: sizes should be long enough to hold all the values. 1049 */ 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1052 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1053 { 1054 int i,j,k,row; 1055 PetscTruth flg; 1056 1057 PetscFunctionBegin; 1058 for (i=0,j=0; i<n; j++) { 1059 row = idx[i]; 1060 if (row%bs!=0) { /* Not the begining of a block */ 1061 sizes[j] = 1; 1062 i++; 1063 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1064 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1065 i++; 1066 } else { /* Begining of the block, so check if the complete block exists */ 1067 flg = PETSC_TRUE; 1068 for (k=1; k<bs; k++) { 1069 if (row+k != idx[i+k]) { /* break in the block */ 1070 flg = PETSC_FALSE; 1071 break; 1072 } 1073 } 1074 if (flg == PETSC_TRUE) { /* No break in the bs */ 1075 sizes[j] = bs; 1076 i+= bs; 1077 } else { 1078 sizes[j] = 1; 1079 i++; 1080 } 1081 } 1082 } 1083 *bs_max = j; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1089 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1090 { 1091 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1092 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1093 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1094 PetscScalar zero = 0.0; 1095 MatScalar *aa; 1096 1097 PetscFunctionBegin; 1098 /* Make a copy of the IS and sort it */ 1099 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1100 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1101 1102 /* allocate memory for rows,sizes */ 1103 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1104 sizes = rows + is_n; 1105 1106 /* copy IS values to rows, and sort them */ 1107 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1108 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1109 if (baij->keepzeroedrows) { 1110 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1111 bs_max = is_n; 1112 } else { 1113 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1114 } 1115 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1116 1117 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1118 row = rows[j]; 1119 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1120 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1121 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1122 if (sizes[i] == bs && !baij->keepzeroedrows) { 1123 if (diag) { 1124 if (baij->ilen[row/bs] > 0) { 1125 baij->ilen[row/bs] = 1; 1126 baij->j[baij->i[row/bs]] = row/bs; 1127 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1128 } 1129 /* Now insert all the diagonal values for this bs */ 1130 for (k=0; k<bs; k++) { 1131 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1132 } 1133 } else { /* (!diag) */ 1134 baij->ilen[row/bs] = 0; 1135 } /* end (!diag) */ 1136 } else { /* (sizes[i] != bs) */ 1137 #if defined (PETSC_USE_DEBUG) 1138 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1139 #endif 1140 for (k=0; k<count; k++) { 1141 aa[0] = zero; 1142 aa += bs; 1143 } 1144 if (diag) { 1145 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1146 } 1147 } 1148 } 1149 1150 ierr = PetscFree(rows);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1157 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1158 { 1159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1160 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1161 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1162 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1163 int ridx,cidx,bs2=a->bs2,ierr; 1164 PetscTruth roworiented=a->roworiented; 1165 MatScalar *ap,value,*aa=a->a,*bap; 1166 1167 PetscFunctionBegin; 1168 for (k=0; k<m; k++) { /* loop over added rows */ 1169 row = im[k]; brow = row/bs; 1170 if (row < 0) continue; 1171 #if defined(PETSC_USE_BOPT_g) 1172 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1173 #endif 1174 rp = aj + ai[brow]; 1175 ap = aa + bs2*ai[brow]; 1176 rmax = imax[brow]; 1177 nrow = ailen[brow]; 1178 low = 0; 1179 for (l=0; l<n; l++) { /* loop over added columns */ 1180 if (in[l] < 0) continue; 1181 #if defined(PETSC_USE_BOPT_g) 1182 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1183 #endif 1184 col = in[l]; bcol = col/bs; 1185 ridx = row % bs; cidx = col % bs; 1186 if (roworiented) { 1187 value = v[l + k*n]; 1188 } else { 1189 value = v[k + l*m]; 1190 } 1191 if (!sorted) low = 0; high = nrow; 1192 while (high-low > 7) { 1193 t = (low+high)/2; 1194 if (rp[t] > bcol) high = t; 1195 else low = t; 1196 } 1197 for (i=low; i<high; i++) { 1198 if (rp[i] > bcol) break; 1199 if (rp[i] == bcol) { 1200 bap = ap + bs2*i + bs*cidx + ridx; 1201 if (is == ADD_VALUES) *bap += value; 1202 else *bap = value; 1203 goto noinsert1; 1204 } 1205 } 1206 if (nonew == 1) goto noinsert1; 1207 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1208 if (nrow >= rmax) { 1209 /* there is no extra room in row, therefore enlarge */ 1210 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1211 MatScalar *new_a; 1212 1213 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1214 1215 /* Malloc new storage space */ 1216 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1217 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1218 new_j = (int*)(new_a + bs2*new_nz); 1219 new_i = new_j + new_nz; 1220 1221 /* copy over old data into new slots */ 1222 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1223 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1224 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1225 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1226 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1227 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1228 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1229 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1230 /* free up old matrix storage */ 1231 ierr = PetscFree(a->a);CHKERRQ(ierr); 1232 if (!a->singlemalloc) { 1233 ierr = PetscFree(a->i);CHKERRQ(ierr); 1234 ierr = PetscFree(a->j);CHKERRQ(ierr); 1235 } 1236 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1237 a->singlemalloc = PETSC_TRUE; 1238 1239 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1240 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1241 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1242 a->maxnz += bs2*CHUNKSIZE; 1243 a->reallocs++; 1244 a->nz++; 1245 } 1246 N = nrow++ - 1; 1247 /* shift up all the later entries in this row */ 1248 for (ii=N; ii>=i; ii--) { 1249 rp[ii+1] = rp[ii]; 1250 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1251 } 1252 if (N>=i) { 1253 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1254 } 1255 rp[i] = bcol; 1256 ap[bs2*i + bs*cidx + ridx] = value; 1257 noinsert1:; 1258 low = i; 1259 } 1260 ailen[brow] = nrow; 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1268 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1269 { 1270 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1271 Mat outA; 1272 int ierr; 1273 PetscTruth row_identity,col_identity; 1274 1275 PetscFunctionBegin; 1276 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1277 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1278 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1279 if (!row_identity || !col_identity) { 1280 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1281 } 1282 1283 outA = inA; 1284 inA->factor = FACTOR_LU; 1285 1286 if (!a->diag) { 1287 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1288 } 1289 1290 a->row = row; 1291 a->col = col; 1292 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1293 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1294 1295 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1296 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1297 PetscLogObjectParent(inA,a->icol); 1298 1299 /* 1300 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1301 for ILU(0) factorization with natural ordering 1302 */ 1303 if (a->bs < 8) { 1304 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1305 } else { 1306 if (!a->solve_work) { 1307 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1308 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1309 } 1310 } 1311 1312 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1313 1314 PetscFunctionReturn(0); 1315 } 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1318 int MatPrintHelp_SeqBAIJ(Mat A) 1319 { 1320 static PetscTruth called = PETSC_FALSE; 1321 MPI_Comm comm = A->comm; 1322 int ierr; 1323 1324 PetscFunctionBegin; 1325 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1326 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1327 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 EXTERN_C_BEGIN 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1334 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1335 { 1336 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1337 int i,nz,nbs; 1338 1339 PetscFunctionBegin; 1340 nz = baij->maxnz/baij->bs2; 1341 nbs = baij->nbs; 1342 for (i=0; i<nz; i++) { 1343 baij->j[i] = indices[i]; 1344 } 1345 baij->nz = nz; 1346 for (i=0; i<nbs; i++) { 1347 baij->ilen[i] = baij->imax[i]; 1348 } 1349 1350 PetscFunctionReturn(0); 1351 } 1352 EXTERN_C_END 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1356 /*@ 1357 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1358 in the matrix. 1359 1360 Input Parameters: 1361 + mat - the SeqBAIJ matrix 1362 - indices - the column indices 1363 1364 Level: advanced 1365 1366 Notes: 1367 This can be called if you have precomputed the nonzero structure of the 1368 matrix and want to provide it to the matrix object to improve the performance 1369 of the MatSetValues() operation. 1370 1371 You MUST have set the correct numbers of nonzeros per row in the call to 1372 MatCreateSeqBAIJ(). 1373 1374 MUST be called before any calls to MatSetValues(); 1375 1376 @*/ 1377 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1378 { 1379 int ierr,(*f)(Mat,int *); 1380 1381 PetscFunctionBegin; 1382 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1383 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1384 if (f) { 1385 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1386 } else { 1387 SETERRQ(1,"Wrong type of matrix to set column indices"); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1394 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1395 { 1396 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1397 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1398 PetscReal atmp; 1399 PetscScalar *x,zero = 0.0; 1400 MatScalar *aa; 1401 int ncols,brow,krow,kcol; 1402 1403 PetscFunctionBegin; 1404 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1405 bs = a->bs; 1406 aa = a->a; 1407 ai = a->i; 1408 aj = a->j; 1409 mbs = a->mbs; 1410 1411 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1412 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1413 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1414 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1415 for (i=0; i<mbs; i++) { 1416 ncols = ai[1] - ai[0]; ai++; 1417 brow = bs*i; 1418 for (j=0; j<ncols; j++){ 1419 /* bcol = bs*(*aj); */ 1420 for (kcol=0; kcol<bs; kcol++){ 1421 for (krow=0; krow<bs; krow++){ 1422 atmp = PetscAbsScalar(*aa); aa++; 1423 row = brow + krow; /* row index */ 1424 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1425 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1426 } 1427 } 1428 aj++; 1429 } 1430 } 1431 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1437 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1438 { 1439 int ierr; 1440 1441 PetscFunctionBegin; 1442 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1448 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1449 { 1450 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1451 PetscFunctionBegin; 1452 *array = a->a; 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1458 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1459 { 1460 PetscFunctionBegin; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 /* -------------------------------------------------------------------*/ 1465 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1466 MatGetRow_SeqBAIJ, 1467 MatRestoreRow_SeqBAIJ, 1468 MatMult_SeqBAIJ_N, 1469 MatMultAdd_SeqBAIJ_N, 1470 MatMultTranspose_SeqBAIJ, 1471 MatMultTransposeAdd_SeqBAIJ, 1472 MatSolve_SeqBAIJ_N, 1473 0, 1474 0, 1475 0, 1476 MatLUFactor_SeqBAIJ, 1477 0, 1478 0, 1479 MatTranspose_SeqBAIJ, 1480 MatGetInfo_SeqBAIJ, 1481 MatEqual_SeqBAIJ, 1482 MatGetDiagonal_SeqBAIJ, 1483 MatDiagonalScale_SeqBAIJ, 1484 MatNorm_SeqBAIJ, 1485 0, 1486 MatAssemblyEnd_SeqBAIJ, 1487 0, 1488 MatSetOption_SeqBAIJ, 1489 MatZeroEntries_SeqBAIJ, 1490 MatZeroRows_SeqBAIJ, 1491 MatLUFactorSymbolic_SeqBAIJ, 1492 MatLUFactorNumeric_SeqBAIJ_N, 1493 0, 1494 0, 1495 MatSetUpPreallocation_SeqBAIJ, 1496 MatILUFactorSymbolic_SeqBAIJ, 1497 0, 1498 MatGetArray_SeqBAIJ, 1499 MatRestoreArray_SeqBAIJ, 1500 MatDuplicate_SeqBAIJ, 1501 0, 1502 0, 1503 MatILUFactor_SeqBAIJ, 1504 0, 1505 0, 1506 MatGetSubMatrices_SeqBAIJ, 1507 MatIncreaseOverlap_SeqBAIJ, 1508 MatGetValues_SeqBAIJ, 1509 0, 1510 MatPrintHelp_SeqBAIJ, 1511 MatScale_SeqBAIJ, 1512 0, 1513 0, 1514 0, 1515 MatGetBlockSize_SeqBAIJ, 1516 MatGetRowIJ_SeqBAIJ, 1517 MatRestoreRowIJ_SeqBAIJ, 1518 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 0, 1524 MatSetValuesBlocked_SeqBAIJ, 1525 MatGetSubMatrix_SeqBAIJ, 1526 MatDestroy_SeqBAIJ, 1527 MatView_SeqBAIJ, 1528 MatGetPetscMaps_Petsc, 1529 0, 1530 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 MatGetRowMax_SeqBAIJ, 1536 MatConvert_Basic}; 1537 1538 EXTERN_C_BEGIN 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1541 int MatStoreValues_SeqBAIJ(Mat mat) 1542 { 1543 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1544 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1545 int ierr; 1546 1547 PetscFunctionBegin; 1548 if (aij->nonew != 1) { 1549 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1550 } 1551 1552 /* allocate space for values if not already there */ 1553 if (!aij->saved_values) { 1554 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1555 } 1556 1557 /* copy values over */ 1558 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 EXTERN_C_END 1562 1563 EXTERN_C_BEGIN 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1566 int MatRetrieveValues_SeqBAIJ(Mat mat) 1567 { 1568 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1569 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1570 1571 PetscFunctionBegin; 1572 if (aij->nonew != 1) { 1573 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1574 } 1575 if (!aij->saved_values) { 1576 SETERRQ(1,"Must call MatStoreValues(A);first"); 1577 } 1578 1579 /* copy values over */ 1580 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 EXTERN_C_END 1584 1585 EXTERN_C_BEGIN 1586 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1587 EXTERN_C_END 1588 1589 EXTERN_C_BEGIN 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatCreate_SeqBAIJ" 1592 int MatCreate_SeqBAIJ(Mat B) 1593 { 1594 int ierr,size; 1595 Mat_SeqBAIJ *b; 1596 1597 PetscFunctionBegin; 1598 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1599 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1600 1601 B->m = B->M = PetscMax(B->m,B->M); 1602 B->n = B->N = PetscMax(B->n,B->N); 1603 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1604 B->data = (void*)b; 1605 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1606 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1607 B->factor = 0; 1608 B->lupivotthreshold = 1.0; 1609 B->mapping = 0; 1610 b->row = 0; 1611 b->col = 0; 1612 b->icol = 0; 1613 b->reallocs = 0; 1614 b->saved_values = 0; 1615 #if defined(PETSC_USE_MAT_SINGLE) 1616 b->setvalueslen = 0; 1617 b->setvaluescopy = PETSC_NULL; 1618 #endif 1619 b->single_precision_solves = PETSC_FALSE; 1620 1621 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1622 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1623 1624 b->sorted = PETSC_FALSE; 1625 b->roworiented = PETSC_TRUE; 1626 b->nonew = 0; 1627 b->diag = 0; 1628 b->solve_work = 0; 1629 b->mult_work = 0; 1630 B->spptr = 0; 1631 B->info.nz_unneeded = (PetscReal)b->maxnz; 1632 b->keepzeroedrows = PETSC_FALSE; 1633 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1635 "MatStoreValues_SeqBAIJ", 1636 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1638 "MatRetrieveValues_SeqBAIJ", 1639 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1641 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1642 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1643 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1644 "MatConvert_SeqBAIJ_SeqAIJ", 1645 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1646 PetscFunctionReturn(0); 1647 } 1648 EXTERN_C_END 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1652 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1653 { 1654 Mat C; 1655 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1656 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1657 1658 PetscFunctionBegin; 1659 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1660 1661 *B = 0; 1662 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1663 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1664 c = (Mat_SeqBAIJ*)C->data; 1665 1666 c->bs = a->bs; 1667 c->bs2 = a->bs2; 1668 c->mbs = a->mbs; 1669 c->nbs = a->nbs; 1670 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1671 1672 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1673 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1674 for (i=0; i<mbs; i++) { 1675 c->imax[i] = a->imax[i]; 1676 c->ilen[i] = a->ilen[i]; 1677 } 1678 1679 /* allocate the matrix space */ 1680 c->singlemalloc = PETSC_TRUE; 1681 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1682 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1683 c->j = (int*)(c->a + nz*bs2); 1684 c->i = c->j + nz; 1685 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1686 if (mbs > 0) { 1687 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1688 if (cpvalues == MAT_COPY_VALUES) { 1689 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1690 } else { 1691 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1692 } 1693 } 1694 1695 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1696 c->sorted = a->sorted; 1697 c->roworiented = a->roworiented; 1698 c->nonew = a->nonew; 1699 1700 if (a->diag) { 1701 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1702 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1703 for (i=0; i<mbs; i++) { 1704 c->diag[i] = a->diag[i]; 1705 } 1706 } else c->diag = 0; 1707 c->nz = a->nz; 1708 c->maxnz = a->maxnz; 1709 c->solve_work = 0; 1710 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1711 c->mult_work = 0; 1712 C->preallocated = PETSC_TRUE; 1713 C->assembled = PETSC_TRUE; 1714 *B = C; 1715 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1716 PetscFunctionReturn(0); 1717 } 1718 1719 EXTERN_C_BEGIN 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatLoad_SeqBAIJ" 1722 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1723 { 1724 Mat_SeqBAIJ *a; 1725 Mat B; 1726 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1727 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1728 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1729 int *masked,nmask,tmp,bs2,ishift; 1730 PetscScalar *aa; 1731 MPI_Comm comm = ((PetscObject)viewer)->comm; 1732 1733 PetscFunctionBegin; 1734 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1735 bs2 = bs*bs; 1736 1737 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1738 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1739 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1740 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1741 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1742 M = header[1]; N = header[2]; nz = header[3]; 1743 1744 if (header[3] < 0) { 1745 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1746 } 1747 1748 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1749 1750 /* 1751 This code adds extra rows to make sure the number of rows is 1752 divisible by the blocksize 1753 */ 1754 mbs = M/bs; 1755 extra_rows = bs - M + bs*(mbs); 1756 if (extra_rows == bs) extra_rows = 0; 1757 else mbs++; 1758 if (extra_rows) { 1759 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1760 } 1761 1762 /* read in row lengths */ 1763 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1764 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1765 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1766 1767 /* read in column indices */ 1768 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1769 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1770 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1771 1772 /* loop over row lengths determining block row lengths */ 1773 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1774 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1775 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1776 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1777 masked = mask + mbs; 1778 rowcount = 0; nzcount = 0; 1779 for (i=0; i<mbs; i++) { 1780 nmask = 0; 1781 for (j=0; j<bs; j++) { 1782 kmax = rowlengths[rowcount]; 1783 for (k=0; k<kmax; k++) { 1784 tmp = jj[nzcount++]/bs; 1785 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1786 } 1787 rowcount++; 1788 } 1789 browlengths[i] += nmask; 1790 /* zero out the mask elements we set */ 1791 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1792 } 1793 1794 /* create our matrix */ 1795 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1796 B = *A; 1797 a = (Mat_SeqBAIJ*)B->data; 1798 1799 /* set matrix "i" values */ 1800 a->i[0] = 0; 1801 for (i=1; i<= mbs; i++) { 1802 a->i[i] = a->i[i-1] + browlengths[i-1]; 1803 a->ilen[i-1] = browlengths[i-1]; 1804 } 1805 a->nz = 0; 1806 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1807 1808 /* read in nonzero values */ 1809 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1810 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1811 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1812 1813 /* set "a" and "j" values into matrix */ 1814 nzcount = 0; jcount = 0; 1815 for (i=0; i<mbs; i++) { 1816 nzcountb = nzcount; 1817 nmask = 0; 1818 for (j=0; j<bs; j++) { 1819 kmax = rowlengths[i*bs+j]; 1820 for (k=0; k<kmax; k++) { 1821 tmp = jj[nzcount++]/bs; 1822 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1823 } 1824 } 1825 /* sort the masked values */ 1826 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1827 1828 /* set "j" values into matrix */ 1829 maskcount = 1; 1830 for (j=0; j<nmask; j++) { 1831 a->j[jcount++] = masked[j]; 1832 mask[masked[j]] = maskcount++; 1833 } 1834 /* set "a" values into matrix */ 1835 ishift = bs2*a->i[i]; 1836 for (j=0; j<bs; j++) { 1837 kmax = rowlengths[i*bs+j]; 1838 for (k=0; k<kmax; k++) { 1839 tmp = jj[nzcountb]/bs ; 1840 block = mask[tmp] - 1; 1841 point = jj[nzcountb] - bs*tmp; 1842 idx = ishift + bs2*block + j + bs*point; 1843 a->a[idx] = (MatScalar)aa[nzcountb++]; 1844 } 1845 } 1846 /* zero out the mask elements we set */ 1847 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1848 } 1849 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1850 1851 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1852 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1853 ierr = PetscFree(aa);CHKERRQ(ierr); 1854 ierr = PetscFree(jj);CHKERRQ(ierr); 1855 ierr = PetscFree(mask);CHKERRQ(ierr); 1856 1857 B->assembled = PETSC_TRUE; 1858 1859 ierr = MatView_Private(B);CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 EXTERN_C_END 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatCreateSeqBAIJ" 1866 /*@C 1867 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1868 compressed row) format. For good matrix assembly performance the 1869 user should preallocate the matrix storage by setting the parameter nz 1870 (or the array nnz). By setting these parameters accurately, performance 1871 during matrix assembly can be increased by more than a factor of 50. 1872 1873 Collective on MPI_Comm 1874 1875 Input Parameters: 1876 + comm - MPI communicator, set to PETSC_COMM_SELF 1877 . bs - size of block 1878 . m - number of rows 1879 . n - number of columns 1880 . nz - number of nonzero blocks per block row (same for all rows) 1881 - nnz - array containing the number of nonzero blocks in the various block rows 1882 (possibly different for each block row) or PETSC_NULL 1883 1884 Output Parameter: 1885 . A - the matrix 1886 1887 Options Database Keys: 1888 . -mat_no_unroll - uses code that does not unroll the loops in the 1889 block calculations (much slower) 1890 . -mat_block_size - size of the blocks to use 1891 1892 Level: intermediate 1893 1894 Notes: 1895 A nonzero block is any block that as 1 or more nonzeros in it 1896 1897 The block AIJ format is fully compatible with standard Fortran 77 1898 storage. That is, the stored row and column indices can begin at 1899 either one (as in Fortran) or zero. See the users' manual for details. 1900 1901 Specify the preallocated storage with either nz or nnz (not both). 1902 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1903 allocation. For additional details, see the users manual chapter on 1904 matrices. 1905 1906 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1907 @*/ 1908 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1909 { 1910 int ierr; 1911 1912 PetscFunctionBegin; 1913 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1914 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1915 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1921 /*@C 1922 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1923 per row in the matrix. For good matrix assembly performance the 1924 user should preallocate the matrix storage by setting the parameter nz 1925 (or the array nnz). By setting these parameters accurately, performance 1926 during matrix assembly can be increased by more than a factor of 50. 1927 1928 Collective on MPI_Comm 1929 1930 Input Parameters: 1931 + A - the matrix 1932 . bs - size of block 1933 . nz - number of block nonzeros per block row (same for all rows) 1934 - nnz - array containing the number of block nonzeros in the various block rows 1935 (possibly different for each block row) or PETSC_NULL 1936 1937 Options Database Keys: 1938 . -mat_no_unroll - uses code that does not unroll the loops in the 1939 block calculations (much slower) 1940 . -mat_block_size - size of the blocks to use 1941 1942 Level: intermediate 1943 1944 Notes: 1945 The block AIJ format is fully compatible with standard Fortran 77 1946 storage. That is, the stored row and column indices can begin at 1947 either one (as in Fortran) or zero. See the users' manual for details. 1948 1949 Specify the preallocated storage with either nz or nnz (not both). 1950 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1951 allocation. For additional details, see the users manual chapter on 1952 matrices. 1953 1954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1955 @*/ 1956 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1957 { 1958 Mat_SeqBAIJ *b; 1959 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1960 PetscTruth flg; 1961 1962 PetscFunctionBegin; 1963 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1964 if (!flg) PetscFunctionReturn(0); 1965 1966 B->preallocated = PETSC_TRUE; 1967 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1968 if (nnz && newbs != bs) { 1969 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1970 } 1971 bs = newbs; 1972 1973 mbs = B->m/bs; 1974 nbs = B->n/bs; 1975 bs2 = bs*bs; 1976 1977 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1978 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1979 } 1980 1981 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1982 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1983 if (nnz) { 1984 for (i=0; i<mbs; i++) { 1985 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1986 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1987 } 1988 } 1989 1990 b = (Mat_SeqBAIJ*)B->data; 1991 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1992 B->ops->solve = MatSolve_SeqBAIJ_Update; 1993 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1994 if (!flg) { 1995 switch (bs) { 1996 case 1: 1997 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1998 B->ops->mult = MatMult_SeqBAIJ_1; 1999 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2000 break; 2001 case 2: 2002 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2003 B->ops->mult = MatMult_SeqBAIJ_2; 2004 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2005 break; 2006 case 3: 2007 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2008 B->ops->mult = MatMult_SeqBAIJ_3; 2009 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2010 break; 2011 case 4: 2012 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2013 B->ops->mult = MatMult_SeqBAIJ_4; 2014 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2015 break; 2016 case 5: 2017 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2018 B->ops->mult = MatMult_SeqBAIJ_5; 2019 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2020 break; 2021 case 6: 2022 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2023 B->ops->mult = MatMult_SeqBAIJ_6; 2024 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2025 break; 2026 case 7: 2027 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2028 B->ops->mult = MatMult_SeqBAIJ_7; 2029 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2030 break; 2031 default: 2032 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2033 B->ops->mult = MatMult_SeqBAIJ_N; 2034 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2035 break; 2036 } 2037 } 2038 b->bs = bs; 2039 b->mbs = mbs; 2040 b->nbs = nbs; 2041 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2042 if (!nnz) { 2043 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2044 else if (nz <= 0) nz = 1; 2045 for (i=0; i<mbs; i++) b->imax[i] = nz; 2046 nz = nz*mbs; 2047 } else { 2048 nz = 0; 2049 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2050 } 2051 2052 /* allocate the matrix space */ 2053 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2054 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2055 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2056 b->j = (int*)(b->a + nz*bs2); 2057 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2058 b->i = b->j + nz; 2059 b->singlemalloc = PETSC_TRUE; 2060 2061 b->i[0] = 0; 2062 for (i=1; i<mbs+1; i++) { 2063 b->i[i] = b->i[i-1] + b->imax[i-1]; 2064 } 2065 2066 /* b->ilen will count nonzeros in each block row so far. */ 2067 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2068 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2069 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2070 2071 b->bs = bs; 2072 b->bs2 = bs2; 2073 b->mbs = mbs; 2074 b->nz = 0; 2075 b->maxnz = nz*bs2; 2076 B->info.nz_unneeded = (PetscReal)b->maxnz; 2077 PetscFunctionReturn(0); 2078 } 2079 2080