1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 PetscScalar *value = v; 31 MatScalar *ap,*aa = a->a,*bap; 32 33 PetscFunctionBegin; 34 stepval = (n-1)*4; 35 for (k=0; k<m; k++) { /* loop over added rows */ 36 row = im[k]; 37 rp = aj + ai[row]; 38 ap = aa + 16*ai[row]; 39 nrow = ailen[row]; 40 low = 0; 41 for (l=0; l<n; l++) { /* loop over added columns */ 42 col = in[l]; 43 value = v + k*(stepval+4)*4 + l*4; 44 low = 0; high = nrow; 45 while (high-low > 7) { 46 t = (low+high)/2; 47 if (rp[t] > col) high = t; 48 else low = t; 49 } 50 for (i=low; i<high; i++) { 51 if (rp[i] > col) break; 52 if (rp[i] == col) { 53 bap = ap + 16*i; 54 for (ii=0; ii<4; ii++,value+=stepval) { 55 for (jj=ii; jj<16; jj+=4) { 56 bap[jj] += *value++; 57 } 58 } 59 goto noinsert2; 60 } 61 } 62 N = nrow++ - 1; 63 /* shift up all the later entries in this row */ 64 for (ii=N; ii>=i; ii--) { 65 rp[ii+1] = rp[ii]; 66 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67 } 68 if (N >= i) { 69 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70 } 71 rp[i] = col; 72 bap = ap + 16*i; 73 for (ii=0; ii<4; ii++,value+=stepval) { 74 for (jj=ii; jj<16; jj+=4) { 75 bap[jj] = *value++; 76 } 77 } 78 noinsert2:; 79 low = i; 80 } 81 ailen[row] = nrow; 82 } 83 } 84 85 #if defined(PETSC_HAVE_FORTRAN_CAPS) 86 #define matsetvalues4_ MATSETVALUES4 87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88 #define matsetvalues4_ matsetvalues4 89 #endif 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatSetValues4_" 93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94 { 95 Mat A = *AA; 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 98 int *ai=a->i,*ailen=a->ilen; 99 int *aj=a->j,brow,bcol; 100 int ridx,cidx; 101 MatScalar *ap,value,*aa=a->a,*bap; 102 103 PetscFunctionBegin; 104 for (k=0; k<m; k++) { /* loop over added rows */ 105 row = im[k]; brow = row/4; 106 rp = aj + ai[brow]; 107 ap = aa + 16*ai[brow]; 108 nrow = ailen[brow]; 109 low = 0; 110 for (l=0; l<n; l++) { /* loop over added columns */ 111 col = in[l]; bcol = col/4; 112 ridx = row % 4; cidx = col % 4; 113 value = v[l + k*n]; 114 low = 0; high = nrow; 115 while (high-low > 7) { 116 t = (low+high)/2; 117 if (rp[t] > bcol) high = t; 118 else low = t; 119 } 120 for (i=low; i<high; i++) { 121 if (rp[i] > bcol) break; 122 if (rp[i] == bcol) { 123 bap = ap + 16*i + 4*cidx + ridx; 124 *bap += value; 125 goto noinsert1; 126 } 127 } 128 N = nrow++ - 1; 129 /* shift up all the later entries in this row */ 130 for (ii=N; ii>=i; ii--) { 131 rp[ii+1] = rp[ii]; 132 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133 } 134 if (N>=i) { 135 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136 } 137 rp[i] = bcol; 138 ap[16*i + 4*cidx + ridx] = value; 139 noinsert1:; 140 low = i; 141 } 142 ailen[brow] = nrow; 143 } 144 } 145 146 /* UGLY, ugly, ugly 147 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 148 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 149 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 150 converts the entries into single precision and then calls ..._MatScalar() to put them 151 into the single precision data structures. 152 */ 153 #if defined(PETSC_USE_MAT_SINGLE) 154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 155 #else 156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 157 #endif 158 #if defined(PETSC_HAVE_DSCPACK) 159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 160 #endif 161 162 #define CHUNKSIZE 10 163 164 /* 165 Checks for missing diagonals 166 */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169 int MatMissingDiagonal_SeqBAIJ(Mat A) 170 { 171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172 int *diag,*jj = a->j,i,ierr; 173 174 PetscFunctionBegin; 175 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176 diag = a->diag; 177 for (i=0; i<a->mbs; i++) { 178 if (jj[diag[i]] != i) { 179 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187 int MatMarkDiagonal_SeqBAIJ(Mat A) 188 { 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 190 int i,j,*diag,m = a->mbs,ierr; 191 192 PetscFunctionBegin; 193 if (a->diag) PetscFunctionReturn(0); 194 195 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 197 for (i=0; i<m; i++) { 198 diag[i] = a->i[i+1]; 199 for (j=a->i[i]; j<a->i[i+1]; j++) { 200 if (a->j[j] == i) { 201 diag[i] = j; 202 break; 203 } 204 } 205 } 206 a->diag = diag; 207 PetscFunctionReturn(0); 208 } 209 210 211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 int ierr,n = a->mbs,i; 219 220 PetscFunctionBegin; 221 *nn = n; 222 if (!ia) PetscFunctionReturn(0); 223 if (symmetric) { 224 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 225 } else if (oshift == 1) { 226 /* temporarily add 1 to i and j indices */ 227 int nz = a->i[n]; 228 for (i=0; i<nz; i++) a->j[i]++; 229 for (i=0; i<n+1; i++) a->i[i]++; 230 *ia = a->i; *ja = a->j; 231 } else { 232 *ia = a->i; *ja = a->j; 233 } 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 int i,n = a->mbs,ierr; 244 245 PetscFunctionBegin; 246 if (!ia) PetscFunctionReturn(0); 247 if (symmetric) { 248 ierr = PetscFree(*ia);CHKERRQ(ierr); 249 ierr = PetscFree(*ja);CHKERRQ(ierr); 250 } else if (oshift == 1) { 251 int nz = a->i[n]-1; 252 for (i=0; i<nz; i++) a->j[i]--; 253 for (i=0; i<n+1; i++) a->i[i]--; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 261 { 262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 263 264 PetscFunctionBegin; 265 *bs = baij->bs; 266 PetscFunctionReturn(0); 267 } 268 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 272 int MatDestroy_SeqBAIJ(Mat A) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275 int ierr; 276 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_LOG) 279 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 280 #endif 281 ierr = PetscFree(a->a);CHKERRQ(ierr); 282 if (!a->singlemalloc) { 283 ierr = PetscFree(a->i);CHKERRQ(ierr); 284 ierr = PetscFree(a->j);CHKERRQ(ierr); 285 } 286 if (a->row) { 287 ierr = ISDestroy(a->row);CHKERRQ(ierr); 288 } 289 if (a->col) { 290 ierr = ISDestroy(a->col);CHKERRQ(ierr); 291 } 292 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299 #if defined(PETSC_USE_MAT_SINGLE) 300 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301 #endif 302 ierr = PetscFree(a);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetOption_SeqBAIJ" 308 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 309 { 310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 311 312 PetscFunctionBegin; 313 switch (op) { 314 case MAT_ROW_ORIENTED: 315 a->roworiented = PETSC_TRUE; 316 break; 317 case MAT_COLUMN_ORIENTED: 318 a->roworiented = PETSC_FALSE; 319 break; 320 case MAT_COLUMNS_SORTED: 321 a->sorted = PETSC_TRUE; 322 break; 323 case MAT_COLUMNS_UNSORTED: 324 a->sorted = PETSC_FALSE; 325 break; 326 case MAT_KEEP_ZEROED_ROWS: 327 a->keepzeroedrows = PETSC_TRUE; 328 break; 329 case MAT_NO_NEW_NONZERO_LOCATIONS: 330 a->nonew = 1; 331 break; 332 case MAT_NEW_NONZERO_LOCATION_ERR: 333 a->nonew = -1; 334 break; 335 case MAT_NEW_NONZERO_ALLOCATION_ERR: 336 a->nonew = -2; 337 break; 338 case MAT_YES_NEW_NONZERO_LOCATIONS: 339 a->nonew = 0; 340 break; 341 case MAT_ROWS_SORTED: 342 case MAT_ROWS_UNSORTED: 343 case MAT_YES_NEW_DIAGONALS: 344 case MAT_IGNORE_OFF_PROC_ENTRIES: 345 case MAT_USE_HASH_TABLE: 346 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347 break; 348 case MAT_NO_NEW_DIAGONALS: 349 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350 case MAT_USE_SINGLE_PRECISION_SOLVES: 351 if (a->bs==4) { 352 a->single_precision_solves = PETSC_TRUE; 353 A->ops->solve = MatSolve_SeqBAIJ_Update; 354 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 355 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 356 } else { 357 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 358 } 359 break; 360 default: 361 SETERRQ(PETSC_ERR_SUP,"unknown option"); 362 } 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "MatGetRow_SeqBAIJ" 368 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 369 { 370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 371 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 372 MatScalar *aa,*aa_i; 373 PetscScalar *v_i; 374 375 PetscFunctionBegin; 376 bs = a->bs; 377 ai = a->i; 378 aj = a->j; 379 aa = a->a; 380 bs2 = a->bs2; 381 382 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 383 384 bn = row/bs; /* Block number */ 385 bp = row % bs; /* Block Position */ 386 M = ai[bn+1] - ai[bn]; 387 *nz = bs*M; 388 389 if (v) { 390 *v = 0; 391 if (*nz) { 392 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 393 for (i=0; i<M; i++) { /* for each block in the block row */ 394 v_i = *v + i*bs; 395 aa_i = aa + bs2*(ai[bn] + i); 396 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 397 } 398 } 399 } 400 401 if (idx) { 402 *idx = 0; 403 if (*nz) { 404 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 405 for (i=0; i<M; i++) { /* for each block in the block row */ 406 idx_i = *idx + i*bs; 407 itmp = bs*aj[ai[bn] + i]; 408 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 409 } 410 } 411 } 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 417 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 418 { 419 int ierr; 420 421 PetscFunctionBegin; 422 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 423 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatTranspose_SeqBAIJ" 429 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 430 { 431 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 432 Mat C; 433 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 434 int *rows,*cols,bs2=a->bs2; 435 PetscScalar *array; 436 437 PetscFunctionBegin; 438 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 439 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 440 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 441 442 #if defined(PETSC_USE_MAT_SINGLE) 443 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 444 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 445 #else 446 array = a->a; 447 #endif 448 449 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 450 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 451 ierr = PetscFree(col);CHKERRQ(ierr); 452 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 453 cols = rows + bs; 454 for (i=0; i<mbs; i++) { 455 cols[0] = i*bs; 456 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 457 len = ai[i+1] - ai[i]; 458 for (j=0; j<len; j++) { 459 rows[0] = (*aj++)*bs; 460 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 461 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 462 array += bs2; 463 } 464 } 465 ierr = PetscFree(rows);CHKERRQ(ierr); 466 #if defined(PETSC_USE_MAT_SINGLE) 467 ierr = PetscFree(array); 468 #endif 469 470 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 471 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 472 473 if (B) { 474 *B = C; 475 } else { 476 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 477 } 478 PetscFunctionReturn(0); 479 } 480 481 #undef __FUNCT__ 482 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 483 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 484 { 485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 486 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 487 PetscScalar *aa; 488 FILE *file; 489 490 PetscFunctionBegin; 491 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 492 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 493 col_lens[0] = MAT_FILE_COOKIE; 494 495 col_lens[1] = A->m; 496 col_lens[2] = A->n; 497 col_lens[3] = a->nz*bs2; 498 499 /* store lengths of each row and write (including header) to file */ 500 count = 0; 501 for (i=0; i<a->mbs; i++) { 502 for (j=0; j<bs; j++) { 503 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 504 } 505 } 506 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 507 ierr = PetscFree(col_lens);CHKERRQ(ierr); 508 509 /* store column indices (zero start index) */ 510 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 511 count = 0; 512 for (i=0; i<a->mbs; i++) { 513 for (j=0; j<bs; j++) { 514 for (k=a->i[i]; k<a->i[i+1]; k++) { 515 for (l=0; l<bs; l++) { 516 jj[count++] = bs*a->j[k] + l; 517 } 518 } 519 } 520 } 521 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 522 ierr = PetscFree(jj);CHKERRQ(ierr); 523 524 /* store nonzero values */ 525 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 526 count = 0; 527 for (i=0; i<a->mbs; i++) { 528 for (j=0; j<bs; j++) { 529 for (k=a->i[i]; k<a->i[i+1]; k++) { 530 for (l=0; l<bs; l++) { 531 aa[count++] = a->a[bs2*k + l*bs + j]; 532 } 533 } 534 } 535 } 536 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 537 ierr = PetscFree(aa);CHKERRQ(ierr); 538 539 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 540 if (file) { 541 fprintf(file,"-matload_block_size %d\n",a->bs); 542 } 543 PetscFunctionReturn(0); 544 } 545 546 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 550 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 551 { 552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 553 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 554 PetscViewerFormat format; 555 556 PetscFunctionBegin; 557 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 558 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 559 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 560 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 561 Mat aij; 562 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 563 ierr = MatView(aij,viewer);CHKERRQ(ierr); 564 ierr = MatDestroy(aij);CHKERRQ(ierr); 565 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 566 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 567 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 568 #endif 569 PetscFunctionReturn(0); 570 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 571 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 572 for (i=0; i<a->mbs; i++) { 573 for (j=0; j<bs; j++) { 574 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 575 for (k=a->i[i]; k<a->i[i+1]; k++) { 576 for (l=0; l<bs; l++) { 577 #if defined(PETSC_USE_COMPLEX) 578 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 579 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 580 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 581 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 582 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 583 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 584 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 585 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 586 } 587 #else 588 if (a->a[bs2*k + l*bs + j] != 0.0) { 589 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 590 } 591 #endif 592 } 593 } 594 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 595 } 596 } 597 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 598 } else { 599 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 600 for (i=0; i<a->mbs; i++) { 601 for (j=0; j<bs; j++) { 602 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 603 for (k=a->i[i]; k<a->i[i+1]; k++) { 604 for (l=0; l<bs; l++) { 605 #if defined(PETSC_USE_COMPLEX) 606 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 607 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 608 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 609 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 610 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 611 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 612 } else { 613 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 614 } 615 #else 616 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 617 #endif 618 } 619 } 620 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 621 } 622 } 623 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 624 } 625 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 631 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 632 { 633 Mat A = (Mat) Aa; 634 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 635 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 636 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 637 MatScalar *aa; 638 PetscViewer viewer; 639 640 PetscFunctionBegin; 641 642 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 643 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 644 645 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 646 647 /* loop over matrix elements drawing boxes */ 648 color = PETSC_DRAW_BLUE; 649 for (i=0,row=0; i<mbs; i++,row+=bs) { 650 for (j=a->i[i]; j<a->i[i+1]; j++) { 651 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 652 x_l = a->j[j]*bs; x_r = x_l + 1.0; 653 aa = a->a + j*bs2; 654 for (k=0; k<bs; k++) { 655 for (l=0; l<bs; l++) { 656 if (PetscRealPart(*aa++) >= 0.) continue; 657 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 658 } 659 } 660 } 661 } 662 color = PETSC_DRAW_CYAN; 663 for (i=0,row=0; i<mbs; i++,row+=bs) { 664 for (j=a->i[i]; j<a->i[i+1]; j++) { 665 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 666 x_l = a->j[j]*bs; x_r = x_l + 1.0; 667 aa = a->a + j*bs2; 668 for (k=0; k<bs; k++) { 669 for (l=0; l<bs; l++) { 670 if (PetscRealPart(*aa++) != 0.) continue; 671 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 672 } 673 } 674 } 675 } 676 677 color = PETSC_DRAW_RED; 678 for (i=0,row=0; i<mbs; i++,row+=bs) { 679 for (j=a->i[i]; j<a->i[i+1]; j++) { 680 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 681 x_l = a->j[j]*bs; x_r = x_l + 1.0; 682 aa = a->a + j*bs2; 683 for (k=0; k<bs; k++) { 684 for (l=0; l<bs; l++) { 685 if (PetscRealPart(*aa++) <= 0.) continue; 686 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 687 } 688 } 689 } 690 } 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 696 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 697 { 698 int ierr; 699 PetscReal xl,yl,xr,yr,w,h; 700 PetscDraw draw; 701 PetscTruth isnull; 702 703 PetscFunctionBegin; 704 705 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 706 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 707 708 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 709 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 710 xr += w; yr += h; xl = -w; yl = -h; 711 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 712 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 713 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatView_SeqBAIJ" 719 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 720 { 721 int ierr; 722 PetscTruth issocket,isascii,isbinary,isdraw; 723 724 PetscFunctionBegin; 725 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 726 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 727 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 729 if (issocket) { 730 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 731 } else if (isascii){ 732 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 733 } else if (isbinary) { 734 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 735 } else if (isdraw) { 736 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 737 } else { 738 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatGetValues_SeqBAIJ" 746 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 747 { 748 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 749 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 750 int *ai = a->i,*ailen = a->ilen; 751 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 752 MatScalar *ap,*aa = a->a,zero = 0.0; 753 754 PetscFunctionBegin; 755 for (k=0; k<m; k++) { /* loop over rows */ 756 row = im[k]; brow = row/bs; 757 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 758 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 759 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 760 nrow = ailen[brow]; 761 for (l=0; l<n; l++) { /* loop over columns */ 762 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 763 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 764 col = in[l] ; 765 bcol = col/bs; 766 cidx = col%bs; 767 ridx = row%bs; 768 high = nrow; 769 low = 0; /* assume unsorted */ 770 while (high-low > 5) { 771 t = (low+high)/2; 772 if (rp[t] > bcol) high = t; 773 else low = t; 774 } 775 for (i=low; i<high; i++) { 776 if (rp[i] > bcol) break; 777 if (rp[i] == bcol) { 778 *v++ = ap[bs2*i+bs*cidx+ridx]; 779 goto finished; 780 } 781 } 782 *v++ = zero; 783 finished:; 784 } 785 } 786 PetscFunctionReturn(0); 787 } 788 789 #if defined(PETSC_USE_MAT_SINGLE) 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 792 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 793 { 794 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 795 int ierr,i,N = m*n*b->bs2; 796 MatScalar *vsingle; 797 798 PetscFunctionBegin; 799 if (N > b->setvalueslen) { 800 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 801 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 802 b->setvalueslen = N; 803 } 804 vsingle = b->setvaluescopy; 805 for (i=0; i<N; i++) { 806 vsingle[i] = v[i]; 807 } 808 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 #endif 812 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 816 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 817 { 818 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 819 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 820 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 821 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 822 PetscTruth roworiented=a->roworiented; 823 MatScalar *value = v,*ap,*aa = a->a,*bap; 824 825 PetscFunctionBegin; 826 if (roworiented) { 827 stepval = (n-1)*bs; 828 } else { 829 stepval = (m-1)*bs; 830 } 831 for (k=0; k<m; k++) { /* loop over added rows */ 832 row = im[k]; 833 if (row < 0) continue; 834 #if defined(PETSC_USE_BOPT_g) 835 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 836 #endif 837 rp = aj + ai[row]; 838 ap = aa + bs2*ai[row]; 839 rmax = imax[row]; 840 nrow = ailen[row]; 841 low = 0; 842 for (l=0; l<n; l++) { /* loop over added columns */ 843 if (in[l] < 0) continue; 844 #if defined(PETSC_USE_BOPT_g) 845 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 846 #endif 847 col = in[l]; 848 if (roworiented) { 849 value = v + k*(stepval+bs)*bs + l*bs; 850 } else { 851 value = v + l*(stepval+bs)*bs + k*bs; 852 } 853 if (!sorted) low = 0; high = nrow; 854 while (high-low > 7) { 855 t = (low+high)/2; 856 if (rp[t] > col) high = t; 857 else low = t; 858 } 859 for (i=low; i<high; i++) { 860 if (rp[i] > col) break; 861 if (rp[i] == col) { 862 bap = ap + bs2*i; 863 if (roworiented) { 864 if (is == ADD_VALUES) { 865 for (ii=0; ii<bs; ii++,value+=stepval) { 866 for (jj=ii; jj<bs2; jj+=bs) { 867 bap[jj] += *value++; 868 } 869 } 870 } else { 871 for (ii=0; ii<bs; ii++,value+=stepval) { 872 for (jj=ii; jj<bs2; jj+=bs) { 873 bap[jj] = *value++; 874 } 875 } 876 } 877 } else { 878 if (is == ADD_VALUES) { 879 for (ii=0; ii<bs; ii++,value+=stepval) { 880 for (jj=0; jj<bs; jj++) { 881 *bap++ += *value++; 882 } 883 } 884 } else { 885 for (ii=0; ii<bs; ii++,value+=stepval) { 886 for (jj=0; jj<bs; jj++) { 887 *bap++ = *value++; 888 } 889 } 890 } 891 } 892 goto noinsert2; 893 } 894 } 895 if (nonew == 1) goto noinsert2; 896 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 897 if (nrow >= rmax) { 898 /* there is no extra room in row, therefore enlarge */ 899 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 900 MatScalar *new_a; 901 902 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 903 904 /* malloc new storage space */ 905 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 906 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 907 new_j = (int*)(new_a + bs2*new_nz); 908 new_i = new_j + new_nz; 909 910 /* copy over old data into new slots */ 911 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 912 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 913 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 914 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 915 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 916 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 917 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 918 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 919 /* free up old matrix storage */ 920 ierr = PetscFree(a->a);CHKERRQ(ierr); 921 if (!a->singlemalloc) { 922 ierr = PetscFree(a->i);CHKERRQ(ierr); 923 ierr = PetscFree(a->j);CHKERRQ(ierr); 924 } 925 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 926 a->singlemalloc = PETSC_TRUE; 927 928 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 929 rmax = imax[row] = imax[row] + CHUNKSIZE; 930 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 931 a->maxnz += bs2*CHUNKSIZE; 932 a->reallocs++; 933 a->nz++; 934 } 935 N = nrow++ - 1; 936 /* shift up all the later entries in this row */ 937 for (ii=N; ii>=i; ii--) { 938 rp[ii+1] = rp[ii]; 939 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 940 } 941 if (N >= i) { 942 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 943 } 944 rp[i] = col; 945 bap = ap + bs2*i; 946 if (roworiented) { 947 for (ii=0; ii<bs; ii++,value+=stepval) { 948 for (jj=ii; jj<bs2; jj+=bs) { 949 bap[jj] = *value++; 950 } 951 } 952 } else { 953 for (ii=0; ii<bs; ii++,value+=stepval) { 954 for (jj=0; jj<bs; jj++) { 955 *bap++ = *value++; 956 } 957 } 958 } 959 noinsert2:; 960 low = i; 961 } 962 ailen[row] = nrow; 963 } 964 PetscFunctionReturn(0); 965 } 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 969 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 970 { 971 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 972 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 973 int m = A->m,*ip,N,*ailen = a->ilen; 974 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 975 MatScalar *aa = a->a,*ap; 976 #if defined(PETSC_HAVE_DSCPACK) 977 PetscTruth flag; 978 #endif 979 980 PetscFunctionBegin; 981 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 982 983 if (m) rmax = ailen[0]; 984 for (i=1; i<mbs; i++) { 985 /* move each row back by the amount of empty slots (fshift) before it*/ 986 fshift += imax[i-1] - ailen[i-1]; 987 rmax = PetscMax(rmax,ailen[i]); 988 if (fshift) { 989 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 990 N = ailen[i]; 991 for (j=0; j<N; j++) { 992 ip[j-fshift] = ip[j]; 993 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 994 } 995 } 996 ai[i] = ai[i-1] + ailen[i-1]; 997 } 998 if (mbs) { 999 fshift += imax[mbs-1] - ailen[mbs-1]; 1000 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1001 } 1002 /* reset ilen and imax for each row */ 1003 for (i=0; i<mbs; i++) { 1004 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1005 } 1006 a->nz = ai[mbs]; 1007 1008 /* diagonals may have moved, so kill the diagonal pointers */ 1009 if (fshift && a->diag) { 1010 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1011 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1012 a->diag = 0; 1013 } 1014 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1015 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1016 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1017 a->reallocs = 0; 1018 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1019 1020 #if defined(PETSC_HAVE_DSCPACK) 1021 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1022 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1023 #endif 1024 1025 PetscFunctionReturn(0); 1026 } 1027 1028 1029 1030 /* 1031 This function returns an array of flags which indicate the locations of contiguous 1032 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1033 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1034 Assume: sizes should be long enough to hold all the values. 1035 */ 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1038 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1039 { 1040 int i,j,k,row; 1041 PetscTruth flg; 1042 1043 PetscFunctionBegin; 1044 for (i=0,j=0; i<n; j++) { 1045 row = idx[i]; 1046 if (row%bs!=0) { /* Not the begining of a block */ 1047 sizes[j] = 1; 1048 i++; 1049 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1050 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1051 i++; 1052 } else { /* Begining of the block, so check if the complete block exists */ 1053 flg = PETSC_TRUE; 1054 for (k=1; k<bs; k++) { 1055 if (row+k != idx[i+k]) { /* break in the block */ 1056 flg = PETSC_FALSE; 1057 break; 1058 } 1059 } 1060 if (flg == PETSC_TRUE) { /* No break in the bs */ 1061 sizes[j] = bs; 1062 i+= bs; 1063 } else { 1064 sizes[j] = 1; 1065 i++; 1066 } 1067 } 1068 } 1069 *bs_max = j; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1075 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1076 { 1077 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1078 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1079 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1080 PetscScalar zero = 0.0; 1081 MatScalar *aa; 1082 1083 PetscFunctionBegin; 1084 /* Make a copy of the IS and sort it */ 1085 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1086 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1087 1088 /* allocate memory for rows,sizes */ 1089 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1090 sizes = rows + is_n; 1091 1092 /* copy IS values to rows, and sort them */ 1093 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1094 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1095 if (baij->keepzeroedrows) { 1096 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1097 bs_max = is_n; 1098 } else { 1099 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1100 } 1101 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1102 1103 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1104 row = rows[j]; 1105 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1106 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1107 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1108 if (sizes[i] == bs && !baij->keepzeroedrows) { 1109 if (diag) { 1110 if (baij->ilen[row/bs] > 0) { 1111 baij->ilen[row/bs] = 1; 1112 baij->j[baij->i[row/bs]] = row/bs; 1113 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1114 } 1115 /* Now insert all the diagonal values for this bs */ 1116 for (k=0; k<bs; k++) { 1117 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1118 } 1119 } else { /* (!diag) */ 1120 baij->ilen[row/bs] = 0; 1121 } /* end (!diag) */ 1122 } else { /* (sizes[i] != bs) */ 1123 #if defined (PETSC_USE_DEBUG) 1124 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1125 #endif 1126 for (k=0; k<count; k++) { 1127 aa[0] = zero; 1128 aa += bs; 1129 } 1130 if (diag) { 1131 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1132 } 1133 } 1134 } 1135 1136 ierr = PetscFree(rows);CHKERRQ(ierr); 1137 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1143 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1146 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1147 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1148 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1149 int ridx,cidx,bs2=a->bs2,ierr; 1150 PetscTruth roworiented=a->roworiented; 1151 MatScalar *ap,value,*aa=a->a,*bap; 1152 1153 PetscFunctionBegin; 1154 for (k=0; k<m; k++) { /* loop over added rows */ 1155 row = im[k]; brow = row/bs; 1156 if (row < 0) continue; 1157 #if defined(PETSC_USE_BOPT_g) 1158 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1159 #endif 1160 rp = aj + ai[brow]; 1161 ap = aa + bs2*ai[brow]; 1162 rmax = imax[brow]; 1163 nrow = ailen[brow]; 1164 low = 0; 1165 for (l=0; l<n; l++) { /* loop over added columns */ 1166 if (in[l] < 0) continue; 1167 #if defined(PETSC_USE_BOPT_g) 1168 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1169 #endif 1170 col = in[l]; bcol = col/bs; 1171 ridx = row % bs; cidx = col % bs; 1172 if (roworiented) { 1173 value = v[l + k*n]; 1174 } else { 1175 value = v[k + l*m]; 1176 } 1177 if (!sorted) low = 0; high = nrow; 1178 while (high-low > 7) { 1179 t = (low+high)/2; 1180 if (rp[t] > bcol) high = t; 1181 else low = t; 1182 } 1183 for (i=low; i<high; i++) { 1184 if (rp[i] > bcol) break; 1185 if (rp[i] == bcol) { 1186 bap = ap + bs2*i + bs*cidx + ridx; 1187 if (is == ADD_VALUES) *bap += value; 1188 else *bap = value; 1189 goto noinsert1; 1190 } 1191 } 1192 if (nonew == 1) goto noinsert1; 1193 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1194 if (nrow >= rmax) { 1195 /* there is no extra room in row, therefore enlarge */ 1196 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1197 MatScalar *new_a; 1198 1199 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1200 1201 /* Malloc new storage space */ 1202 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1203 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1204 new_j = (int*)(new_a + bs2*new_nz); 1205 new_i = new_j + new_nz; 1206 1207 /* copy over old data into new slots */ 1208 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1209 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1210 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1211 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1212 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1213 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1214 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1215 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1216 /* free up old matrix storage */ 1217 ierr = PetscFree(a->a);CHKERRQ(ierr); 1218 if (!a->singlemalloc) { 1219 ierr = PetscFree(a->i);CHKERRQ(ierr); 1220 ierr = PetscFree(a->j);CHKERRQ(ierr); 1221 } 1222 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1223 a->singlemalloc = PETSC_TRUE; 1224 1225 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1226 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1227 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1228 a->maxnz += bs2*CHUNKSIZE; 1229 a->reallocs++; 1230 a->nz++; 1231 } 1232 N = nrow++ - 1; 1233 /* shift up all the later entries in this row */ 1234 for (ii=N; ii>=i; ii--) { 1235 rp[ii+1] = rp[ii]; 1236 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1237 } 1238 if (N>=i) { 1239 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1240 } 1241 rp[i] = bcol; 1242 ap[bs2*i + bs*cidx + ridx] = value; 1243 noinsert1:; 1244 low = i; 1245 } 1246 ailen[brow] = nrow; 1247 } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1254 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1255 { 1256 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1257 Mat outA; 1258 int ierr; 1259 PetscTruth row_identity,col_identity; 1260 1261 PetscFunctionBegin; 1262 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1263 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1264 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1265 if (!row_identity || !col_identity) { 1266 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1267 } 1268 1269 outA = inA; 1270 inA->factor = FACTOR_LU; 1271 1272 if (!a->diag) { 1273 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1274 } 1275 1276 a->row = row; 1277 a->col = col; 1278 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1279 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1280 1281 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1282 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1283 PetscLogObjectParent(inA,a->icol); 1284 1285 /* 1286 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1287 for ILU(0) factorization with natural ordering 1288 */ 1289 if (a->bs < 8) { 1290 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1291 } else { 1292 if (!a->solve_work) { 1293 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1294 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1295 } 1296 } 1297 1298 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1299 1300 PetscFunctionReturn(0); 1301 } 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1304 int MatPrintHelp_SeqBAIJ(Mat A) 1305 { 1306 static PetscTruth called = PETSC_FALSE; 1307 MPI_Comm comm = A->comm; 1308 int ierr; 1309 1310 PetscFunctionBegin; 1311 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1312 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1313 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 EXTERN_C_BEGIN 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1320 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1321 { 1322 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1323 int i,nz,nbs; 1324 1325 PetscFunctionBegin; 1326 nz = baij->maxnz/baij->bs2; 1327 nbs = baij->nbs; 1328 for (i=0; i<nz; i++) { 1329 baij->j[i] = indices[i]; 1330 } 1331 baij->nz = nz; 1332 for (i=0; i<nbs; i++) { 1333 baij->ilen[i] = baij->imax[i]; 1334 } 1335 1336 PetscFunctionReturn(0); 1337 } 1338 EXTERN_C_END 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1342 /*@ 1343 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1344 in the matrix. 1345 1346 Input Parameters: 1347 + mat - the SeqBAIJ matrix 1348 - indices - the column indices 1349 1350 Level: advanced 1351 1352 Notes: 1353 This can be called if you have precomputed the nonzero structure of the 1354 matrix and want to provide it to the matrix object to improve the performance 1355 of the MatSetValues() operation. 1356 1357 You MUST have set the correct numbers of nonzeros per row in the call to 1358 MatCreateSeqBAIJ(). 1359 1360 MUST be called before any calls to MatSetValues(); 1361 1362 @*/ 1363 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1364 { 1365 int ierr,(*f)(Mat,int *); 1366 1367 PetscFunctionBegin; 1368 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1369 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1370 if (f) { 1371 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1372 } else { 1373 SETERRQ(1,"Wrong type of matrix to set column indices"); 1374 } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1380 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1381 { 1382 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1383 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1384 PetscReal atmp; 1385 PetscScalar *x,zero = 0.0; 1386 MatScalar *aa; 1387 int ncols,brow,krow,kcol; 1388 1389 PetscFunctionBegin; 1390 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1391 bs = a->bs; 1392 aa = a->a; 1393 ai = a->i; 1394 aj = a->j; 1395 mbs = a->mbs; 1396 1397 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1398 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1399 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1400 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1401 for (i=0; i<mbs; i++) { 1402 ncols = ai[1] - ai[0]; ai++; 1403 brow = bs*i; 1404 for (j=0; j<ncols; j++){ 1405 /* bcol = bs*(*aj); */ 1406 for (kcol=0; kcol<bs; kcol++){ 1407 for (krow=0; krow<bs; krow++){ 1408 atmp = PetscAbsScalar(*aa); aa++; 1409 row = brow + krow; /* row index */ 1410 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1411 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1412 } 1413 } 1414 aj++; 1415 } 1416 } 1417 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1423 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1424 { 1425 int ierr; 1426 1427 PetscFunctionBegin; 1428 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1434 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1435 { 1436 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1437 PetscFunctionBegin; 1438 *array = a->a; 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1444 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1445 { 1446 PetscFunctionBegin; 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #include "petscblaslapack.h" 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1453 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1454 { 1455 int ierr,one=1; 1456 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1457 1458 PetscFunctionBegin; 1459 if (str == SAME_NONZERO_PATTERN) { 1460 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1461 } else { 1462 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 /* -------------------------------------------------------------------*/ 1468 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1469 MatGetRow_SeqBAIJ, 1470 MatRestoreRow_SeqBAIJ, 1471 MatMult_SeqBAIJ_N, 1472 MatMultAdd_SeqBAIJ_N, 1473 MatMultTranspose_SeqBAIJ, 1474 MatMultTransposeAdd_SeqBAIJ, 1475 MatSolve_SeqBAIJ_N, 1476 0, 1477 0, 1478 0, 1479 MatLUFactor_SeqBAIJ, 1480 0, 1481 0, 1482 MatTranspose_SeqBAIJ, 1483 MatGetInfo_SeqBAIJ, 1484 MatEqual_SeqBAIJ, 1485 MatGetDiagonal_SeqBAIJ, 1486 MatDiagonalScale_SeqBAIJ, 1487 MatNorm_SeqBAIJ, 1488 0, 1489 MatAssemblyEnd_SeqBAIJ, 1490 0, 1491 MatSetOption_SeqBAIJ, 1492 MatZeroEntries_SeqBAIJ, 1493 MatZeroRows_SeqBAIJ, 1494 MatLUFactorSymbolic_SeqBAIJ, 1495 MatLUFactorNumeric_SeqBAIJ_N, 1496 0, 1497 0, 1498 MatSetUpPreallocation_SeqBAIJ, 1499 MatILUFactorSymbolic_SeqBAIJ, 1500 0, 1501 MatGetArray_SeqBAIJ, 1502 MatRestoreArray_SeqBAIJ, 1503 MatDuplicate_SeqBAIJ, 1504 0, 1505 0, 1506 MatILUFactor_SeqBAIJ, 1507 0, 1508 MatAXPY_SeqBAIJ, 1509 MatGetSubMatrices_SeqBAIJ, 1510 MatIncreaseOverlap_SeqBAIJ, 1511 MatGetValues_SeqBAIJ, 1512 0, 1513 MatPrintHelp_SeqBAIJ, 1514 MatScale_SeqBAIJ, 1515 0, 1516 0, 1517 0, 1518 MatGetBlockSize_SeqBAIJ, 1519 MatGetRowIJ_SeqBAIJ, 1520 MatRestoreRowIJ_SeqBAIJ, 1521 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 MatSetValuesBlocked_SeqBAIJ, 1528 MatGetSubMatrix_SeqBAIJ, 1529 MatDestroy_SeqBAIJ, 1530 MatView_SeqBAIJ, 1531 MatGetPetscMaps_Petsc, 1532 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 0, 1538 MatGetRowMax_SeqBAIJ, 1539 MatConvert_Basic}; 1540 1541 EXTERN_C_BEGIN 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1544 int MatStoreValues_SeqBAIJ(Mat mat) 1545 { 1546 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1547 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1548 int ierr; 1549 1550 PetscFunctionBegin; 1551 if (aij->nonew != 1) { 1552 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1553 } 1554 1555 /* allocate space for values if not already there */ 1556 if (!aij->saved_values) { 1557 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1558 } 1559 1560 /* copy values over */ 1561 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 EXTERN_C_END 1565 1566 EXTERN_C_BEGIN 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1569 int MatRetrieveValues_SeqBAIJ(Mat mat) 1570 { 1571 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1572 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1573 1574 PetscFunctionBegin; 1575 if (aij->nonew != 1) { 1576 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1577 } 1578 if (!aij->saved_values) { 1579 SETERRQ(1,"Must call MatStoreValues(A);first"); 1580 } 1581 1582 /* copy values over */ 1583 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 EXTERN_C_END 1587 1588 EXTERN_C_BEGIN 1589 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1590 EXTERN_C_END 1591 1592 EXTERN_C_BEGIN 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatCreate_SeqBAIJ" 1595 int MatCreate_SeqBAIJ(Mat B) 1596 { 1597 int ierr,size; 1598 Mat_SeqBAIJ *b; 1599 1600 PetscFunctionBegin; 1601 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1602 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1603 1604 B->m = B->M = PetscMax(B->m,B->M); 1605 B->n = B->N = PetscMax(B->n,B->N); 1606 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1607 B->data = (void*)b; 1608 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1609 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1610 B->factor = 0; 1611 B->lupivotthreshold = 1.0; 1612 B->mapping = 0; 1613 b->row = 0; 1614 b->col = 0; 1615 b->icol = 0; 1616 b->reallocs = 0; 1617 b->saved_values = 0; 1618 #if defined(PETSC_USE_MAT_SINGLE) 1619 b->setvalueslen = 0; 1620 b->setvaluescopy = PETSC_NULL; 1621 #endif 1622 b->single_precision_solves = PETSC_FALSE; 1623 1624 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1625 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1626 1627 b->sorted = PETSC_FALSE; 1628 b->roworiented = PETSC_TRUE; 1629 b->nonew = 0; 1630 b->diag = 0; 1631 b->solve_work = 0; 1632 b->mult_work = 0; 1633 B->spptr = 0; 1634 B->info.nz_unneeded = (PetscReal)b->maxnz; 1635 b->keepzeroedrows = PETSC_FALSE; 1636 1637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1638 "MatStoreValues_SeqBAIJ", 1639 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1641 "MatRetrieveValues_SeqBAIJ", 1642 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1643 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1644 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1645 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1646 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1647 "MatConvert_SeqBAIJ_SeqAIJ", 1648 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1649 PetscFunctionReturn(0); 1650 } 1651 EXTERN_C_END 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1655 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1656 { 1657 Mat C; 1658 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1659 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1660 1661 PetscFunctionBegin; 1662 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1663 1664 *B = 0; 1665 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1666 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1667 c = (Mat_SeqBAIJ*)C->data; 1668 1669 c->bs = a->bs; 1670 c->bs2 = a->bs2; 1671 c->mbs = a->mbs; 1672 c->nbs = a->nbs; 1673 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1674 1675 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1676 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1677 for (i=0; i<mbs; i++) { 1678 c->imax[i] = a->imax[i]; 1679 c->ilen[i] = a->ilen[i]; 1680 } 1681 1682 /* allocate the matrix space */ 1683 c->singlemalloc = PETSC_TRUE; 1684 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1685 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1686 c->j = (int*)(c->a + nz*bs2); 1687 c->i = c->j + nz; 1688 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1689 if (mbs > 0) { 1690 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1691 if (cpvalues == MAT_COPY_VALUES) { 1692 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1693 } else { 1694 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1695 } 1696 } 1697 1698 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1699 c->sorted = a->sorted; 1700 c->roworiented = a->roworiented; 1701 c->nonew = a->nonew; 1702 1703 if (a->diag) { 1704 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1705 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1706 for (i=0; i<mbs; i++) { 1707 c->diag[i] = a->diag[i]; 1708 } 1709 } else c->diag = 0; 1710 c->nz = a->nz; 1711 c->maxnz = a->maxnz; 1712 c->solve_work = 0; 1713 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1714 c->mult_work = 0; 1715 C->preallocated = PETSC_TRUE; 1716 C->assembled = PETSC_TRUE; 1717 *B = C; 1718 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 EXTERN_C_BEGIN 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatLoad_SeqBAIJ" 1725 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1726 { 1727 Mat_SeqBAIJ *a; 1728 Mat B; 1729 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1730 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1731 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1732 int *masked,nmask,tmp,bs2,ishift; 1733 PetscScalar *aa; 1734 MPI_Comm comm = ((PetscObject)viewer)->comm; 1735 1736 PetscFunctionBegin; 1737 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1738 bs2 = bs*bs; 1739 1740 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1741 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1742 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1743 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1744 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1745 M = header[1]; N = header[2]; nz = header[3]; 1746 1747 if (header[3] < 0) { 1748 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1749 } 1750 1751 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1752 1753 /* 1754 This code adds extra rows to make sure the number of rows is 1755 divisible by the blocksize 1756 */ 1757 mbs = M/bs; 1758 extra_rows = bs - M + bs*(mbs); 1759 if (extra_rows == bs) extra_rows = 0; 1760 else mbs++; 1761 if (extra_rows) { 1762 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1763 } 1764 1765 /* read in row lengths */ 1766 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1767 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1768 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1769 1770 /* read in column indices */ 1771 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1772 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1773 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1774 1775 /* loop over row lengths determining block row lengths */ 1776 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1777 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1778 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1779 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1780 masked = mask + mbs; 1781 rowcount = 0; nzcount = 0; 1782 for (i=0; i<mbs; i++) { 1783 nmask = 0; 1784 for (j=0; j<bs; j++) { 1785 kmax = rowlengths[rowcount]; 1786 for (k=0; k<kmax; k++) { 1787 tmp = jj[nzcount++]/bs; 1788 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1789 } 1790 rowcount++; 1791 } 1792 browlengths[i] += nmask; 1793 /* zero out the mask elements we set */ 1794 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1795 } 1796 1797 /* create our matrix */ 1798 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1799 B = *A; 1800 a = (Mat_SeqBAIJ*)B->data; 1801 1802 /* set matrix "i" values */ 1803 a->i[0] = 0; 1804 for (i=1; i<= mbs; i++) { 1805 a->i[i] = a->i[i-1] + browlengths[i-1]; 1806 a->ilen[i-1] = browlengths[i-1]; 1807 } 1808 a->nz = 0; 1809 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1810 1811 /* read in nonzero values */ 1812 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1813 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1814 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1815 1816 /* set "a" and "j" values into matrix */ 1817 nzcount = 0; jcount = 0; 1818 for (i=0; i<mbs; i++) { 1819 nzcountb = nzcount; 1820 nmask = 0; 1821 for (j=0; j<bs; j++) { 1822 kmax = rowlengths[i*bs+j]; 1823 for (k=0; k<kmax; k++) { 1824 tmp = jj[nzcount++]/bs; 1825 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1826 } 1827 } 1828 /* sort the masked values */ 1829 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1830 1831 /* set "j" values into matrix */ 1832 maskcount = 1; 1833 for (j=0; j<nmask; j++) { 1834 a->j[jcount++] = masked[j]; 1835 mask[masked[j]] = maskcount++; 1836 } 1837 /* set "a" values into matrix */ 1838 ishift = bs2*a->i[i]; 1839 for (j=0; j<bs; j++) { 1840 kmax = rowlengths[i*bs+j]; 1841 for (k=0; k<kmax; k++) { 1842 tmp = jj[nzcountb]/bs ; 1843 block = mask[tmp] - 1; 1844 point = jj[nzcountb] - bs*tmp; 1845 idx = ishift + bs2*block + j + bs*point; 1846 a->a[idx] = (MatScalar)aa[nzcountb++]; 1847 } 1848 } 1849 /* zero out the mask elements we set */ 1850 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1851 } 1852 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1853 1854 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1855 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1856 ierr = PetscFree(aa);CHKERRQ(ierr); 1857 ierr = PetscFree(jj);CHKERRQ(ierr); 1858 ierr = PetscFree(mask);CHKERRQ(ierr); 1859 1860 B->assembled = PETSC_TRUE; 1861 1862 ierr = MatView_Private(B);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 EXTERN_C_END 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatCreateSeqBAIJ" 1869 /*@C 1870 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1871 compressed row) format. For good matrix assembly performance the 1872 user should preallocate the matrix storage by setting the parameter nz 1873 (or the array nnz). By setting these parameters accurately, performance 1874 during matrix assembly can be increased by more than a factor of 50. 1875 1876 Collective on MPI_Comm 1877 1878 Input Parameters: 1879 + comm - MPI communicator, set to PETSC_COMM_SELF 1880 . bs - size of block 1881 . m - number of rows 1882 . n - number of columns 1883 . nz - number of nonzero blocks per block row (same for all rows) 1884 - nnz - array containing the number of nonzero blocks in the various block rows 1885 (possibly different for each block row) or PETSC_NULL 1886 1887 Output Parameter: 1888 . A - the matrix 1889 1890 Options Database Keys: 1891 . -mat_no_unroll - uses code that does not unroll the loops in the 1892 block calculations (much slower) 1893 . -mat_block_size - size of the blocks to use 1894 1895 Level: intermediate 1896 1897 Notes: 1898 A nonzero block is any block that as 1 or more nonzeros in it 1899 1900 The block AIJ format is fully compatible with standard Fortran 77 1901 storage. That is, the stored row and column indices can begin at 1902 either one (as in Fortran) or zero. See the users' manual for details. 1903 1904 Specify the preallocated storage with either nz or nnz (not both). 1905 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1906 allocation. For additional details, see the users manual chapter on 1907 matrices. 1908 1909 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1910 @*/ 1911 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1912 { 1913 int ierr; 1914 1915 PetscFunctionBegin; 1916 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1917 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1918 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1924 /*@C 1925 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1926 per row in the matrix. For good matrix assembly performance the 1927 user should preallocate the matrix storage by setting the parameter nz 1928 (or the array nnz). By setting these parameters accurately, performance 1929 during matrix assembly can be increased by more than a factor of 50. 1930 1931 Collective on MPI_Comm 1932 1933 Input Parameters: 1934 + A - the matrix 1935 . bs - size of block 1936 . nz - number of block nonzeros per block row (same for all rows) 1937 - nnz - array containing the number of block nonzeros in the various block rows 1938 (possibly different for each block row) or PETSC_NULL 1939 1940 Options Database Keys: 1941 . -mat_no_unroll - uses code that does not unroll the loops in the 1942 block calculations (much slower) 1943 . -mat_block_size - size of the blocks to use 1944 1945 Level: intermediate 1946 1947 Notes: 1948 The block AIJ format is fully compatible with standard Fortran 77 1949 storage. That is, the stored row and column indices can begin at 1950 either one (as in Fortran) or zero. See the users' manual for details. 1951 1952 Specify the preallocated storage with either nz or nnz (not both). 1953 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1954 allocation. For additional details, see the users manual chapter on 1955 matrices. 1956 1957 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1958 @*/ 1959 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1960 { 1961 Mat_SeqBAIJ *b; 1962 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1963 PetscTruth flg; 1964 1965 PetscFunctionBegin; 1966 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1967 if (!flg) PetscFunctionReturn(0); 1968 1969 B->preallocated = PETSC_TRUE; 1970 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1971 if (nnz && newbs != bs) { 1972 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1973 } 1974 bs = newbs; 1975 1976 mbs = B->m/bs; 1977 nbs = B->n/bs; 1978 bs2 = bs*bs; 1979 1980 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1981 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1982 } 1983 1984 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1985 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1986 if (nnz) { 1987 for (i=0; i<mbs; i++) { 1988 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1989 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1990 } 1991 } 1992 1993 b = (Mat_SeqBAIJ*)B->data; 1994 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1995 B->ops->solve = MatSolve_SeqBAIJ_Update; 1996 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1997 if (!flg) { 1998 switch (bs) { 1999 case 1: 2000 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2001 B->ops->mult = MatMult_SeqBAIJ_1; 2002 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2003 break; 2004 case 2: 2005 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2006 B->ops->mult = MatMult_SeqBAIJ_2; 2007 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2008 break; 2009 case 3: 2010 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2011 B->ops->mult = MatMult_SeqBAIJ_3; 2012 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2013 break; 2014 case 4: 2015 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2016 B->ops->mult = MatMult_SeqBAIJ_4; 2017 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2018 break; 2019 case 5: 2020 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2021 B->ops->mult = MatMult_SeqBAIJ_5; 2022 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2023 break; 2024 case 6: 2025 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2026 B->ops->mult = MatMult_SeqBAIJ_6; 2027 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2028 break; 2029 case 7: 2030 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2031 B->ops->mult = MatMult_SeqBAIJ_7; 2032 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2033 break; 2034 default: 2035 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2036 B->ops->mult = MatMult_SeqBAIJ_N; 2037 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2038 break; 2039 } 2040 } 2041 b->bs = bs; 2042 b->mbs = mbs; 2043 b->nbs = nbs; 2044 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2045 if (!nnz) { 2046 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2047 else if (nz <= 0) nz = 1; 2048 for (i=0; i<mbs; i++) b->imax[i] = nz; 2049 nz = nz*mbs; 2050 } else { 2051 nz = 0; 2052 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2053 } 2054 2055 /* allocate the matrix space */ 2056 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2057 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2058 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2059 b->j = (int*)(b->a + nz*bs2); 2060 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2061 b->i = b->j + nz; 2062 b->singlemalloc = PETSC_TRUE; 2063 2064 b->i[0] = 0; 2065 for (i=1; i<mbs+1; i++) { 2066 b->i[i] = b->i[i-1] + b->imax[i-1]; 2067 } 2068 2069 /* b->ilen will count nonzeros in each block row so far. */ 2070 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2071 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2072 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2073 2074 b->bs = bs; 2075 b->bs2 = bs2; 2076 b->mbs = mbs; 2077 b->nz = 0; 2078 b->maxnz = nz*bs2; 2079 B->info.nz_unneeded = (PetscReal)b->maxnz; 2080 PetscFunctionReturn(0); 2081 } 2082 2083