1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 PetscScalar *value = v; 31 MatScalar *ap,*aa = a->a,*bap; 32 33 PetscFunctionBegin; 34 stepval = (n-1)*4; 35 for (k=0; k<m; k++) { /* loop over added rows */ 36 row = im[k]; 37 rp = aj + ai[row]; 38 ap = aa + 16*ai[row]; 39 nrow = ailen[row]; 40 low = 0; 41 for (l=0; l<n; l++) { /* loop over added columns */ 42 col = in[l]; 43 value = v + k*(stepval+4)*4 + l*4; 44 low = 0; high = nrow; 45 while (high-low > 7) { 46 t = (low+high)/2; 47 if (rp[t] > col) high = t; 48 else low = t; 49 } 50 for (i=low; i<high; i++) { 51 if (rp[i] > col) break; 52 if (rp[i] == col) { 53 bap = ap + 16*i; 54 for (ii=0; ii<4; ii++,value+=stepval) { 55 for (jj=ii; jj<16; jj+=4) { 56 bap[jj] += *value++; 57 } 58 } 59 goto noinsert2; 60 } 61 } 62 N = nrow++ - 1; 63 /* shift up all the later entries in this row */ 64 for (ii=N; ii>=i; ii--) { 65 rp[ii+1] = rp[ii]; 66 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67 } 68 if (N >= i) { 69 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70 } 71 rp[i] = col; 72 bap = ap + 16*i; 73 for (ii=0; ii<4; ii++,value+=stepval) { 74 for (jj=ii; jj<16; jj+=4) { 75 bap[jj] = *value++; 76 } 77 } 78 noinsert2:; 79 low = i; 80 } 81 ailen[row] = nrow; 82 } 83 } 84 85 #if defined(PETSC_HAVE_FORTRAN_CAPS) 86 #define matsetvalues4_ MATSETVALUES4 87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88 #define matsetvalues4_ matsetvalues4 89 #endif 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatSetValues4_" 93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94 { 95 Mat A = *AA; 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 98 int *ai=a->i,*ailen=a->ilen; 99 int *aj=a->j,brow,bcol; 100 int ridx,cidx; 101 MatScalar *ap,value,*aa=a->a,*bap; 102 103 PetscFunctionBegin; 104 for (k=0; k<m; k++) { /* loop over added rows */ 105 row = im[k]; brow = row/4; 106 rp = aj + ai[brow]; 107 ap = aa + 16*ai[brow]; 108 nrow = ailen[brow]; 109 low = 0; 110 for (l=0; l<n; l++) { /* loop over added columns */ 111 col = in[l]; bcol = col/4; 112 ridx = row % 4; cidx = col % 4; 113 value = v[l + k*n]; 114 low = 0; high = nrow; 115 while (high-low > 7) { 116 t = (low+high)/2; 117 if (rp[t] > bcol) high = t; 118 else low = t; 119 } 120 for (i=low; i<high; i++) { 121 if (rp[i] > bcol) break; 122 if (rp[i] == bcol) { 123 bap = ap + 16*i + 4*cidx + ridx; 124 *bap += value; 125 goto noinsert1; 126 } 127 } 128 N = nrow++ - 1; 129 /* shift up all the later entries in this row */ 130 for (ii=N; ii>=i; ii--) { 131 rp[ii+1] = rp[ii]; 132 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133 } 134 if (N>=i) { 135 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136 } 137 rp[i] = bcol; 138 ap[16*i + 4*cidx + ridx] = value; 139 noinsert1:; 140 low = i; 141 } 142 ailen[brow] = nrow; 143 } 144 } 145 146 /* UGLY, ugly, ugly 147 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 148 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 149 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 150 converts the entries into single precision and then calls ..._MatScalar() to put them 151 into the single precision data structures. 152 */ 153 #if defined(PETSC_USE_MAT_SINGLE) 154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 155 #else 156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 157 #endif 158 #if defined(PETSC_HAVE_DSCPACK) 159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 160 #endif 161 162 #define CHUNKSIZE 10 163 164 /* 165 Checks for missing diagonals 166 */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169 int MatMissingDiagonal_SeqBAIJ(Mat A) 170 { 171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172 int *diag,*jj = a->j,i,ierr; 173 174 PetscFunctionBegin; 175 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176 diag = a->diag; 177 for (i=0; i<a->mbs; i++) { 178 if (jj[diag[i]] != i) { 179 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187 int MatMarkDiagonal_SeqBAIJ(Mat A) 188 { 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 190 int i,j,*diag,m = a->mbs,ierr; 191 192 PetscFunctionBegin; 193 if (a->diag) PetscFunctionReturn(0); 194 195 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 197 for (i=0; i<m; i++) { 198 diag[i] = a->i[i+1]; 199 for (j=a->i[i]; j<a->i[i+1]; j++) { 200 if (a->j[j] == i) { 201 diag[i] = j; 202 break; 203 } 204 } 205 } 206 a->diag = diag; 207 PetscFunctionReturn(0); 208 } 209 210 211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 int ierr,n = a->mbs,i; 219 220 PetscFunctionBegin; 221 *nn = n; 222 if (!ia) PetscFunctionReturn(0); 223 if (symmetric) { 224 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 225 } else if (oshift == 1) { 226 /* temporarily add 1 to i and j indices */ 227 int nz = a->i[n]; 228 for (i=0; i<nz; i++) a->j[i]++; 229 for (i=0; i<n+1; i++) a->i[i]++; 230 *ia = a->i; *ja = a->j; 231 } else { 232 *ia = a->i; *ja = a->j; 233 } 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 int i,n = a->mbs,ierr; 244 245 PetscFunctionBegin; 246 if (!ia) PetscFunctionReturn(0); 247 if (symmetric) { 248 ierr = PetscFree(*ia);CHKERRQ(ierr); 249 ierr = PetscFree(*ja);CHKERRQ(ierr); 250 } else if (oshift == 1) { 251 int nz = a->i[n]-1; 252 for (i=0; i<nz; i++) a->j[i]--; 253 for (i=0; i<n+1; i++) a->i[i]--; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 261 { 262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 263 264 PetscFunctionBegin; 265 *bs = baij->bs; 266 PetscFunctionReturn(0); 267 } 268 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 272 int MatDestroy_SeqBAIJ(Mat A) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275 int ierr; 276 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_LOG) 279 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 280 #endif 281 ierr = PetscFree(a->a);CHKERRQ(ierr); 282 if (!a->singlemalloc) { 283 ierr = PetscFree(a->i);CHKERRQ(ierr); 284 ierr = PetscFree(a->j);CHKERRQ(ierr); 285 } 286 if (a->row) { 287 ierr = ISDestroy(a->row);CHKERRQ(ierr); 288 } 289 if (a->col) { 290 ierr = ISDestroy(a->col);CHKERRQ(ierr); 291 } 292 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299 #if defined(PETSC_USE_MAT_SINGLE) 300 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301 #endif 302 ierr = PetscFree(a);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatSetOption_SeqBAIJ" 308 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 309 { 310 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 311 312 PetscFunctionBegin; 313 switch (op) { 314 case MAT_ROW_ORIENTED: 315 a->roworiented = PETSC_TRUE; 316 break; 317 case MAT_COLUMN_ORIENTED: 318 a->roworiented = PETSC_FALSE; 319 break; 320 case MAT_COLUMNS_SORTED: 321 a->sorted = PETSC_TRUE; 322 break; 323 case MAT_COLUMNS_UNSORTED: 324 a->sorted = PETSC_FALSE; 325 break; 326 case MAT_KEEP_ZEROED_ROWS: 327 a->keepzeroedrows = PETSC_TRUE; 328 break; 329 case MAT_NO_NEW_NONZERO_LOCATIONS: 330 a->nonew = 1; 331 break; 332 case MAT_NEW_NONZERO_LOCATION_ERR: 333 a->nonew = -1; 334 break; 335 case MAT_NEW_NONZERO_ALLOCATION_ERR: 336 a->nonew = -2; 337 break; 338 case MAT_YES_NEW_NONZERO_LOCATIONS: 339 a->nonew = 0; 340 break; 341 case MAT_ROWS_SORTED: 342 case MAT_ROWS_UNSORTED: 343 case MAT_YES_NEW_DIAGONALS: 344 case MAT_IGNORE_OFF_PROC_ENTRIES: 345 case MAT_USE_HASH_TABLE: 346 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347 break; 348 case MAT_NO_NEW_DIAGONALS: 349 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350 case MAT_USE_SINGLE_PRECISION_SOLVES: 351 if (a->bs==4) { 352 PetscTruth sse_enabled_local,sse_enabled_global; 353 int ierr; 354 355 sse_enabled_local = PETSC_FALSE; 356 sse_enabled_global = PETSC_FALSE; 357 358 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 359 #if defined(PETSC_HAVE_SSE) 360 if (sse_enabled_local) { 361 a->single_precision_solves = PETSC_TRUE; 362 A->ops->solve = MatSolve_SeqBAIJ_Update; 363 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 364 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 365 break; 366 } else { 367 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 368 } 369 #else 370 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 371 #endif 372 } 373 break; 374 default: 375 SETERRQ(PETSC_ERR_SUP,"unknown option"); 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatGetRow_SeqBAIJ" 382 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 383 { 384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 385 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 386 MatScalar *aa,*aa_i; 387 PetscScalar *v_i; 388 389 PetscFunctionBegin; 390 bs = a->bs; 391 ai = a->i; 392 aj = a->j; 393 aa = a->a; 394 bs2 = a->bs2; 395 396 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 397 398 bn = row/bs; /* Block number */ 399 bp = row % bs; /* Block Position */ 400 M = ai[bn+1] - ai[bn]; 401 *nz = bs*M; 402 403 if (v) { 404 *v = 0; 405 if (*nz) { 406 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 407 for (i=0; i<M; i++) { /* for each block in the block row */ 408 v_i = *v + i*bs; 409 aa_i = aa + bs2*(ai[bn] + i); 410 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 411 } 412 } 413 } 414 415 if (idx) { 416 *idx = 0; 417 if (*nz) { 418 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 419 for (i=0; i<M; i++) { /* for each block in the block row */ 420 idx_i = *idx + i*bs; 421 itmp = bs*aj[ai[bn] + i]; 422 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 423 } 424 } 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 431 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 432 { 433 int ierr; 434 435 PetscFunctionBegin; 436 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 437 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatTranspose_SeqBAIJ" 443 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 444 { 445 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 446 Mat C; 447 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 448 int *rows,*cols,bs2=a->bs2; 449 PetscScalar *array; 450 451 PetscFunctionBegin; 452 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 453 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 454 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 455 456 #if defined(PETSC_USE_MAT_SINGLE) 457 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 458 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 459 #else 460 array = a->a; 461 #endif 462 463 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 464 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 465 ierr = PetscFree(col);CHKERRQ(ierr); 466 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 467 cols = rows + bs; 468 for (i=0; i<mbs; i++) { 469 cols[0] = i*bs; 470 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 471 len = ai[i+1] - ai[i]; 472 for (j=0; j<len; j++) { 473 rows[0] = (*aj++)*bs; 474 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 475 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 476 array += bs2; 477 } 478 } 479 ierr = PetscFree(rows);CHKERRQ(ierr); 480 #if defined(PETSC_USE_MAT_SINGLE) 481 ierr = PetscFree(array); 482 #endif 483 484 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 485 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 486 487 if (B) { 488 *B = C; 489 } else { 490 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 491 } 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 497 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 498 { 499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 500 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 501 PetscScalar *aa; 502 FILE *file; 503 504 PetscFunctionBegin; 505 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 506 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 507 col_lens[0] = MAT_FILE_COOKIE; 508 509 col_lens[1] = A->m; 510 col_lens[2] = A->n; 511 col_lens[3] = a->nz*bs2; 512 513 /* store lengths of each row and write (including header) to file */ 514 count = 0; 515 for (i=0; i<a->mbs; i++) { 516 for (j=0; j<bs; j++) { 517 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 518 } 519 } 520 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 521 ierr = PetscFree(col_lens);CHKERRQ(ierr); 522 523 /* store column indices (zero start index) */ 524 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 525 count = 0; 526 for (i=0; i<a->mbs; i++) { 527 for (j=0; j<bs; j++) { 528 for (k=a->i[i]; k<a->i[i+1]; k++) { 529 for (l=0; l<bs; l++) { 530 jj[count++] = bs*a->j[k] + l; 531 } 532 } 533 } 534 } 535 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 536 ierr = PetscFree(jj);CHKERRQ(ierr); 537 538 /* store nonzero values */ 539 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 540 count = 0; 541 for (i=0; i<a->mbs; i++) { 542 for (j=0; j<bs; j++) { 543 for (k=a->i[i]; k<a->i[i+1]; k++) { 544 for (l=0; l<bs; l++) { 545 aa[count++] = a->a[bs2*k + l*bs + j]; 546 } 547 } 548 } 549 } 550 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 551 ierr = PetscFree(aa);CHKERRQ(ierr); 552 553 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 554 if (file) { 555 fprintf(file,"-matload_block_size %d\n",a->bs); 556 } 557 PetscFunctionReturn(0); 558 } 559 560 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 564 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 565 { 566 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 567 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 568 PetscViewerFormat format; 569 570 PetscFunctionBegin; 571 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 572 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 573 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 574 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 575 Mat aij; 576 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 577 ierr = MatView(aij,viewer);CHKERRQ(ierr); 578 ierr = MatDestroy(aij);CHKERRQ(ierr); 579 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 580 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 581 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 582 #endif 583 PetscFunctionReturn(0); 584 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 586 for (i=0; i<a->mbs; i++) { 587 for (j=0; j<bs; j++) { 588 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 589 for (k=a->i[i]; k<a->i[i+1]; k++) { 590 for (l=0; l<bs; l++) { 591 #if defined(PETSC_USE_COMPLEX) 592 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 593 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 594 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 595 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 596 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 597 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 598 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 599 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 600 } 601 #else 602 if (a->a[bs2*k + l*bs + j] != 0.0) { 603 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 604 } 605 #endif 606 } 607 } 608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 609 } 610 } 611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 612 } else { 613 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 614 for (i=0; i<a->mbs; i++) { 615 for (j=0; j<bs; j++) { 616 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 617 for (k=a->i[i]; k<a->i[i+1]; k++) { 618 for (l=0; l<bs; l++) { 619 #if defined(PETSC_USE_COMPLEX) 620 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 621 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 622 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 623 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 624 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 625 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 626 } else { 627 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 628 } 629 #else 630 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 631 #endif 632 } 633 } 634 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 635 } 636 } 637 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 638 } 639 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 645 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 646 { 647 Mat A = (Mat) Aa; 648 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 649 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 650 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 651 MatScalar *aa; 652 PetscViewer viewer; 653 654 PetscFunctionBegin; 655 656 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 657 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 658 659 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 660 661 /* loop over matrix elements drawing boxes */ 662 color = PETSC_DRAW_BLUE; 663 for (i=0,row=0; i<mbs; i++,row+=bs) { 664 for (j=a->i[i]; j<a->i[i+1]; j++) { 665 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 666 x_l = a->j[j]*bs; x_r = x_l + 1.0; 667 aa = a->a + j*bs2; 668 for (k=0; k<bs; k++) { 669 for (l=0; l<bs; l++) { 670 if (PetscRealPart(*aa++) >= 0.) continue; 671 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 672 } 673 } 674 } 675 } 676 color = PETSC_DRAW_CYAN; 677 for (i=0,row=0; i<mbs; i++,row+=bs) { 678 for (j=a->i[i]; j<a->i[i+1]; j++) { 679 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 680 x_l = a->j[j]*bs; x_r = x_l + 1.0; 681 aa = a->a + j*bs2; 682 for (k=0; k<bs; k++) { 683 for (l=0; l<bs; l++) { 684 if (PetscRealPart(*aa++) != 0.) continue; 685 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 686 } 687 } 688 } 689 } 690 691 color = PETSC_DRAW_RED; 692 for (i=0,row=0; i<mbs; i++,row+=bs) { 693 for (j=a->i[i]; j<a->i[i+1]; j++) { 694 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 695 x_l = a->j[j]*bs; x_r = x_l + 1.0; 696 aa = a->a + j*bs2; 697 for (k=0; k<bs; k++) { 698 for (l=0; l<bs; l++) { 699 if (PetscRealPart(*aa++) <= 0.) continue; 700 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 701 } 702 } 703 } 704 } 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 710 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 711 { 712 int ierr; 713 PetscReal xl,yl,xr,yr,w,h; 714 PetscDraw draw; 715 PetscTruth isnull; 716 717 PetscFunctionBegin; 718 719 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 720 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 721 722 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 723 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 724 xr += w; yr += h; xl = -w; yl = -h; 725 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 726 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 727 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatView_SeqBAIJ" 733 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 734 { 735 int ierr; 736 PetscTruth issocket,isascii,isbinary,isdraw; 737 738 PetscFunctionBegin; 739 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 740 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 741 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 742 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 743 if (issocket) { 744 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 745 } else if (isascii){ 746 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 747 } else if (isbinary) { 748 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 749 } else if (isdraw) { 750 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 751 } else { 752 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatGetValues_SeqBAIJ" 760 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 761 { 762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 763 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 764 int *ai = a->i,*ailen = a->ilen; 765 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 766 MatScalar *ap,*aa = a->a,zero = 0.0; 767 768 PetscFunctionBegin; 769 for (k=0; k<m; k++) { /* loop over rows */ 770 row = im[k]; brow = row/bs; 771 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 772 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 773 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 774 nrow = ailen[brow]; 775 for (l=0; l<n; l++) { /* loop over columns */ 776 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 777 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 778 col = in[l] ; 779 bcol = col/bs; 780 cidx = col%bs; 781 ridx = row%bs; 782 high = nrow; 783 low = 0; /* assume unsorted */ 784 while (high-low > 5) { 785 t = (low+high)/2; 786 if (rp[t] > bcol) high = t; 787 else low = t; 788 } 789 for (i=low; i<high; i++) { 790 if (rp[i] > bcol) break; 791 if (rp[i] == bcol) { 792 *v++ = ap[bs2*i+bs*cidx+ridx]; 793 goto finished; 794 } 795 } 796 *v++ = zero; 797 finished:; 798 } 799 } 800 PetscFunctionReturn(0); 801 } 802 803 #if defined(PETSC_USE_MAT_SINGLE) 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 806 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 807 { 808 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 809 int ierr,i,N = m*n*b->bs2; 810 MatScalar *vsingle; 811 812 PetscFunctionBegin; 813 if (N > b->setvalueslen) { 814 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 815 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 816 b->setvalueslen = N; 817 } 818 vsingle = b->setvaluescopy; 819 for (i=0; i<N; i++) { 820 vsingle[i] = v[i]; 821 } 822 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 #endif 826 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 830 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 831 { 832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 833 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 834 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 835 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 836 PetscTruth roworiented=a->roworiented; 837 MatScalar *value = v,*ap,*aa = a->a,*bap; 838 839 PetscFunctionBegin; 840 if (roworiented) { 841 stepval = (n-1)*bs; 842 } else { 843 stepval = (m-1)*bs; 844 } 845 for (k=0; k<m; k++) { /* loop over added rows */ 846 row = im[k]; 847 if (row < 0) continue; 848 #if defined(PETSC_USE_BOPT_g) 849 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 850 #endif 851 rp = aj + ai[row]; 852 ap = aa + bs2*ai[row]; 853 rmax = imax[row]; 854 nrow = ailen[row]; 855 low = 0; 856 for (l=0; l<n; l++) { /* loop over added columns */ 857 if (in[l] < 0) continue; 858 #if defined(PETSC_USE_BOPT_g) 859 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 860 #endif 861 col = in[l]; 862 if (roworiented) { 863 value = v + k*(stepval+bs)*bs + l*bs; 864 } else { 865 value = v + l*(stepval+bs)*bs + k*bs; 866 } 867 if (!sorted) low = 0; high = nrow; 868 while (high-low > 7) { 869 t = (low+high)/2; 870 if (rp[t] > col) high = t; 871 else low = t; 872 } 873 for (i=low; i<high; i++) { 874 if (rp[i] > col) break; 875 if (rp[i] == col) { 876 bap = ap + bs2*i; 877 if (roworiented) { 878 if (is == ADD_VALUES) { 879 for (ii=0; ii<bs; ii++,value+=stepval) { 880 for (jj=ii; jj<bs2; jj+=bs) { 881 bap[jj] += *value++; 882 } 883 } 884 } else { 885 for (ii=0; ii<bs; ii++,value+=stepval) { 886 for (jj=ii; jj<bs2; jj+=bs) { 887 bap[jj] = *value++; 888 } 889 } 890 } 891 } else { 892 if (is == ADD_VALUES) { 893 for (ii=0; ii<bs; ii++,value+=stepval) { 894 for (jj=0; jj<bs; jj++) { 895 *bap++ += *value++; 896 } 897 } 898 } else { 899 for (ii=0; ii<bs; ii++,value+=stepval) { 900 for (jj=0; jj<bs; jj++) { 901 *bap++ = *value++; 902 } 903 } 904 } 905 } 906 goto noinsert2; 907 } 908 } 909 if (nonew == 1) goto noinsert2; 910 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 911 if (nrow >= rmax) { 912 /* there is no extra room in row, therefore enlarge */ 913 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 914 MatScalar *new_a; 915 916 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 917 918 /* malloc new storage space */ 919 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 920 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 921 new_j = (int*)(new_a + bs2*new_nz); 922 new_i = new_j + new_nz; 923 924 /* copy over old data into new slots */ 925 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 926 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 927 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 928 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 929 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 930 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 931 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 932 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 933 /* free up old matrix storage */ 934 ierr = PetscFree(a->a);CHKERRQ(ierr); 935 if (!a->singlemalloc) { 936 ierr = PetscFree(a->i);CHKERRQ(ierr); 937 ierr = PetscFree(a->j);CHKERRQ(ierr); 938 } 939 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 940 a->singlemalloc = PETSC_TRUE; 941 942 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 943 rmax = imax[row] = imax[row] + CHUNKSIZE; 944 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 945 a->maxnz += bs2*CHUNKSIZE; 946 a->reallocs++; 947 a->nz++; 948 } 949 N = nrow++ - 1; 950 /* shift up all the later entries in this row */ 951 for (ii=N; ii>=i; ii--) { 952 rp[ii+1] = rp[ii]; 953 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 954 } 955 if (N >= i) { 956 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 957 } 958 rp[i] = col; 959 bap = ap + bs2*i; 960 if (roworiented) { 961 for (ii=0; ii<bs; ii++,value+=stepval) { 962 for (jj=ii; jj<bs2; jj+=bs) { 963 bap[jj] = *value++; 964 } 965 } 966 } else { 967 for (ii=0; ii<bs; ii++,value+=stepval) { 968 for (jj=0; jj<bs; jj++) { 969 *bap++ = *value++; 970 } 971 } 972 } 973 noinsert2:; 974 low = i; 975 } 976 ailen[row] = nrow; 977 } 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 983 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 984 { 985 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 986 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 987 int m = A->m,*ip,N,*ailen = a->ilen; 988 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 989 MatScalar *aa = a->a,*ap; 990 #if defined(PETSC_HAVE_DSCPACK) 991 PetscTruth flag; 992 #endif 993 994 PetscFunctionBegin; 995 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 996 997 if (m) rmax = ailen[0]; 998 for (i=1; i<mbs; i++) { 999 /* move each row back by the amount of empty slots (fshift) before it*/ 1000 fshift += imax[i-1] - ailen[i-1]; 1001 rmax = PetscMax(rmax,ailen[i]); 1002 if (fshift) { 1003 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1004 N = ailen[i]; 1005 for (j=0; j<N; j++) { 1006 ip[j-fshift] = ip[j]; 1007 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1008 } 1009 } 1010 ai[i] = ai[i-1] + ailen[i-1]; 1011 } 1012 if (mbs) { 1013 fshift += imax[mbs-1] - ailen[mbs-1]; 1014 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1015 } 1016 /* reset ilen and imax for each row */ 1017 for (i=0; i<mbs; i++) { 1018 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1019 } 1020 a->nz = ai[mbs]; 1021 1022 /* diagonals may have moved, so kill the diagonal pointers */ 1023 if (fshift && a->diag) { 1024 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1025 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1026 a->diag = 0; 1027 } 1028 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1029 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1030 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1031 a->reallocs = 0; 1032 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1033 1034 #if defined(PETSC_HAVE_DSCPACK) 1035 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1036 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1037 #endif 1038 1039 PetscFunctionReturn(0); 1040 } 1041 1042 1043 1044 /* 1045 This function returns an array of flags which indicate the locations of contiguous 1046 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1047 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1048 Assume: sizes should be long enough to hold all the values. 1049 */ 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1052 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1053 { 1054 int i,j,k,row; 1055 PetscTruth flg; 1056 1057 PetscFunctionBegin; 1058 for (i=0,j=0; i<n; j++) { 1059 row = idx[i]; 1060 if (row%bs!=0) { /* Not the begining of a block */ 1061 sizes[j] = 1; 1062 i++; 1063 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1064 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1065 i++; 1066 } else { /* Begining of the block, so check if the complete block exists */ 1067 flg = PETSC_TRUE; 1068 for (k=1; k<bs; k++) { 1069 if (row+k != idx[i+k]) { /* break in the block */ 1070 flg = PETSC_FALSE; 1071 break; 1072 } 1073 } 1074 if (flg == PETSC_TRUE) { /* No break in the bs */ 1075 sizes[j] = bs; 1076 i+= bs; 1077 } else { 1078 sizes[j] = 1; 1079 i++; 1080 } 1081 } 1082 } 1083 *bs_max = j; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1089 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1090 { 1091 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1092 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1093 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1094 PetscScalar zero = 0.0; 1095 MatScalar *aa; 1096 1097 PetscFunctionBegin; 1098 /* Make a copy of the IS and sort it */ 1099 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1100 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1101 1102 /* allocate memory for rows,sizes */ 1103 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1104 sizes = rows + is_n; 1105 1106 /* copy IS values to rows, and sort them */ 1107 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1108 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1109 if (baij->keepzeroedrows) { 1110 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1111 bs_max = is_n; 1112 } else { 1113 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1114 } 1115 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1116 1117 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1118 row = rows[j]; 1119 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1120 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1121 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1122 if (sizes[i] == bs && !baij->keepzeroedrows) { 1123 if (diag) { 1124 if (baij->ilen[row/bs] > 0) { 1125 baij->ilen[row/bs] = 1; 1126 baij->j[baij->i[row/bs]] = row/bs; 1127 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1128 } 1129 /* Now insert all the diagonal values for this bs */ 1130 for (k=0; k<bs; k++) { 1131 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1132 } 1133 } else { /* (!diag) */ 1134 baij->ilen[row/bs] = 0; 1135 } /* end (!diag) */ 1136 } else { /* (sizes[i] != bs) */ 1137 #if defined (PETSC_USE_DEBUG) 1138 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1139 #endif 1140 for (k=0; k<count; k++) { 1141 aa[0] = zero; 1142 aa += bs; 1143 } 1144 if (diag) { 1145 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1146 } 1147 } 1148 } 1149 1150 ierr = PetscFree(rows);CHKERRQ(ierr); 1151 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1157 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1158 { 1159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1160 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1161 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1162 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1163 int ridx,cidx,bs2=a->bs2,ierr; 1164 PetscTruth roworiented=a->roworiented; 1165 MatScalar *ap,value,*aa=a->a,*bap; 1166 1167 PetscFunctionBegin; 1168 for (k=0; k<m; k++) { /* loop over added rows */ 1169 row = im[k]; brow = row/bs; 1170 if (row < 0) continue; 1171 #if defined(PETSC_USE_BOPT_g) 1172 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1173 #endif 1174 rp = aj + ai[brow]; 1175 ap = aa + bs2*ai[brow]; 1176 rmax = imax[brow]; 1177 nrow = ailen[brow]; 1178 low = 0; 1179 for (l=0; l<n; l++) { /* loop over added columns */ 1180 if (in[l] < 0) continue; 1181 #if defined(PETSC_USE_BOPT_g) 1182 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1183 #endif 1184 col = in[l]; bcol = col/bs; 1185 ridx = row % bs; cidx = col % bs; 1186 if (roworiented) { 1187 value = v[l + k*n]; 1188 } else { 1189 value = v[k + l*m]; 1190 } 1191 if (!sorted) low = 0; high = nrow; 1192 while (high-low > 7) { 1193 t = (low+high)/2; 1194 if (rp[t] > bcol) high = t; 1195 else low = t; 1196 } 1197 for (i=low; i<high; i++) { 1198 if (rp[i] > bcol) break; 1199 if (rp[i] == bcol) { 1200 bap = ap + bs2*i + bs*cidx + ridx; 1201 if (is == ADD_VALUES) *bap += value; 1202 else *bap = value; 1203 goto noinsert1; 1204 } 1205 } 1206 if (nonew == 1) goto noinsert1; 1207 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1208 if (nrow >= rmax) { 1209 /* there is no extra room in row, therefore enlarge */ 1210 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1211 MatScalar *new_a; 1212 1213 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1214 1215 /* Malloc new storage space */ 1216 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1217 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1218 new_j = (int*)(new_a + bs2*new_nz); 1219 new_i = new_j + new_nz; 1220 1221 /* copy over old data into new slots */ 1222 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1223 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1224 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1225 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1226 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1227 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1228 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1229 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1230 /* free up old matrix storage */ 1231 ierr = PetscFree(a->a);CHKERRQ(ierr); 1232 if (!a->singlemalloc) { 1233 ierr = PetscFree(a->i);CHKERRQ(ierr); 1234 ierr = PetscFree(a->j);CHKERRQ(ierr); 1235 } 1236 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1237 a->singlemalloc = PETSC_TRUE; 1238 1239 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1240 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1241 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1242 a->maxnz += bs2*CHUNKSIZE; 1243 a->reallocs++; 1244 a->nz++; 1245 } 1246 N = nrow++ - 1; 1247 /* shift up all the later entries in this row */ 1248 for (ii=N; ii>=i; ii--) { 1249 rp[ii+1] = rp[ii]; 1250 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1251 } 1252 if (N>=i) { 1253 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1254 } 1255 rp[i] = bcol; 1256 ap[bs2*i + bs*cidx + ridx] = value; 1257 noinsert1:; 1258 low = i; 1259 } 1260 ailen[brow] = nrow; 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1268 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1269 { 1270 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1271 Mat outA; 1272 int ierr; 1273 PetscTruth row_identity,col_identity; 1274 1275 PetscFunctionBegin; 1276 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1277 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1278 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1279 if (!row_identity || !col_identity) { 1280 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1281 } 1282 1283 outA = inA; 1284 inA->factor = FACTOR_LU; 1285 1286 if (!a->diag) { 1287 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1288 } 1289 1290 a->row = row; 1291 a->col = col; 1292 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1293 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1294 1295 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1296 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1297 PetscLogObjectParent(inA,a->icol); 1298 1299 /* 1300 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1301 for ILU(0) factorization with natural ordering 1302 */ 1303 if (a->bs < 8) { 1304 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1305 } else { 1306 if (!a->solve_work) { 1307 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1308 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1309 } 1310 } 1311 1312 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1313 1314 PetscFunctionReturn(0); 1315 } 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1318 int MatPrintHelp_SeqBAIJ(Mat A) 1319 { 1320 static PetscTruth called = PETSC_FALSE; 1321 MPI_Comm comm = A->comm; 1322 int ierr; 1323 1324 PetscFunctionBegin; 1325 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1326 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1327 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 EXTERN_C_BEGIN 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1334 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1335 { 1336 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1337 int i,nz,nbs; 1338 1339 PetscFunctionBegin; 1340 nz = baij->maxnz/baij->bs2; 1341 nbs = baij->nbs; 1342 for (i=0; i<nz; i++) { 1343 baij->j[i] = indices[i]; 1344 } 1345 baij->nz = nz; 1346 for (i=0; i<nbs; i++) { 1347 baij->ilen[i] = baij->imax[i]; 1348 } 1349 1350 PetscFunctionReturn(0); 1351 } 1352 EXTERN_C_END 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1356 /*@ 1357 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1358 in the matrix. 1359 1360 Input Parameters: 1361 + mat - the SeqBAIJ matrix 1362 - indices - the column indices 1363 1364 Level: advanced 1365 1366 Notes: 1367 This can be called if you have precomputed the nonzero structure of the 1368 matrix and want to provide it to the matrix object to improve the performance 1369 of the MatSetValues() operation. 1370 1371 You MUST have set the correct numbers of nonzeros per row in the call to 1372 MatCreateSeqBAIJ(). 1373 1374 MUST be called before any calls to MatSetValues(); 1375 1376 @*/ 1377 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1378 { 1379 int ierr,(*f)(Mat,int *); 1380 1381 PetscFunctionBegin; 1382 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1383 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1384 if (f) { 1385 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1386 } else { 1387 SETERRQ(1,"Wrong type of matrix to set column indices"); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1394 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1395 { 1396 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1397 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1398 PetscReal atmp; 1399 PetscScalar *x,zero = 0.0; 1400 MatScalar *aa; 1401 int ncols,brow,krow,kcol; 1402 1403 PetscFunctionBegin; 1404 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1405 bs = a->bs; 1406 aa = a->a; 1407 ai = a->i; 1408 aj = a->j; 1409 mbs = a->mbs; 1410 1411 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1412 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1413 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1414 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1415 for (i=0; i<mbs; i++) { 1416 ncols = ai[1] - ai[0]; ai++; 1417 brow = bs*i; 1418 for (j=0; j<ncols; j++){ 1419 /* bcol = bs*(*aj); */ 1420 for (kcol=0; kcol<bs; kcol++){ 1421 for (krow=0; krow<bs; krow++){ 1422 atmp = PetscAbsScalar(*aa); aa++; 1423 row = brow + krow; /* row index */ 1424 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1425 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1426 } 1427 } 1428 aj++; 1429 } 1430 } 1431 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1437 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1438 { 1439 int ierr; 1440 1441 PetscFunctionBegin; 1442 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1448 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1449 { 1450 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1451 PetscFunctionBegin; 1452 *array = a->a; 1453 PetscFunctionReturn(0); 1454 } 1455 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1458 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1459 { 1460 PetscFunctionBegin; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #include "petscblaslapack.h" 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1467 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1468 { 1469 int ierr,one=1; 1470 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1471 1472 PetscFunctionBegin; 1473 if (str == SAME_NONZERO_PATTERN) { 1474 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1475 } else { 1476 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 /* -------------------------------------------------------------------*/ 1482 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1483 MatGetRow_SeqBAIJ, 1484 MatRestoreRow_SeqBAIJ, 1485 MatMult_SeqBAIJ_N, 1486 MatMultAdd_SeqBAIJ_N, 1487 MatMultTranspose_SeqBAIJ, 1488 MatMultTransposeAdd_SeqBAIJ, 1489 MatSolve_SeqBAIJ_N, 1490 0, 1491 0, 1492 0, 1493 MatLUFactor_SeqBAIJ, 1494 0, 1495 0, 1496 MatTranspose_SeqBAIJ, 1497 MatGetInfo_SeqBAIJ, 1498 MatEqual_SeqBAIJ, 1499 MatGetDiagonal_SeqBAIJ, 1500 MatDiagonalScale_SeqBAIJ, 1501 MatNorm_SeqBAIJ, 1502 0, 1503 MatAssemblyEnd_SeqBAIJ, 1504 0, 1505 MatSetOption_SeqBAIJ, 1506 MatZeroEntries_SeqBAIJ, 1507 MatZeroRows_SeqBAIJ, 1508 MatLUFactorSymbolic_SeqBAIJ, 1509 MatLUFactorNumeric_SeqBAIJ_N, 1510 0, 1511 0, 1512 MatSetUpPreallocation_SeqBAIJ, 1513 MatILUFactorSymbolic_SeqBAIJ, 1514 0, 1515 MatGetArray_SeqBAIJ, 1516 MatRestoreArray_SeqBAIJ, 1517 MatDuplicate_SeqBAIJ, 1518 0, 1519 0, 1520 MatILUFactor_SeqBAIJ, 1521 0, 1522 MatAXPY_SeqBAIJ, 1523 MatGetSubMatrices_SeqBAIJ, 1524 MatIncreaseOverlap_SeqBAIJ, 1525 MatGetValues_SeqBAIJ, 1526 0, 1527 MatPrintHelp_SeqBAIJ, 1528 MatScale_SeqBAIJ, 1529 0, 1530 0, 1531 0, 1532 MatGetBlockSize_SeqBAIJ, 1533 MatGetRowIJ_SeqBAIJ, 1534 MatRestoreRowIJ_SeqBAIJ, 1535 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 MatSetValuesBlocked_SeqBAIJ, 1542 MatGetSubMatrix_SeqBAIJ, 1543 MatDestroy_SeqBAIJ, 1544 MatView_SeqBAIJ, 1545 MatGetPetscMaps_Petsc, 1546 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 MatGetRowMax_SeqBAIJ, 1553 MatConvert_Basic}; 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1558 int MatStoreValues_SeqBAIJ(Mat mat) 1559 { 1560 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1561 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1562 int ierr; 1563 1564 PetscFunctionBegin; 1565 if (aij->nonew != 1) { 1566 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1567 } 1568 1569 /* allocate space for values if not already there */ 1570 if (!aij->saved_values) { 1571 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1572 } 1573 1574 /* copy values over */ 1575 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 EXTERN_C_END 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1583 int MatRetrieveValues_SeqBAIJ(Mat mat) 1584 { 1585 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1586 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1587 1588 PetscFunctionBegin; 1589 if (aij->nonew != 1) { 1590 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1591 } 1592 if (!aij->saved_values) { 1593 SETERRQ(1,"Must call MatStoreValues(A);first"); 1594 } 1595 1596 /* copy values over */ 1597 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 EXTERN_C_END 1601 1602 EXTERN_C_BEGIN 1603 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1604 EXTERN_C_END 1605 1606 EXTERN_C_BEGIN 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatCreate_SeqBAIJ" 1609 int MatCreate_SeqBAIJ(Mat B) 1610 { 1611 int ierr,size; 1612 Mat_SeqBAIJ *b; 1613 1614 PetscFunctionBegin; 1615 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1616 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1617 1618 B->m = B->M = PetscMax(B->m,B->M); 1619 B->n = B->N = PetscMax(B->n,B->N); 1620 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1621 B->data = (void*)b; 1622 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1623 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1624 B->factor = 0; 1625 B->lupivotthreshold = 1.0; 1626 B->mapping = 0; 1627 b->row = 0; 1628 b->col = 0; 1629 b->icol = 0; 1630 b->reallocs = 0; 1631 b->saved_values = 0; 1632 #if defined(PETSC_USE_MAT_SINGLE) 1633 b->setvalueslen = 0; 1634 b->setvaluescopy = PETSC_NULL; 1635 #endif 1636 b->single_precision_solves = PETSC_FALSE; 1637 1638 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1639 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1640 1641 b->sorted = PETSC_FALSE; 1642 b->roworiented = PETSC_TRUE; 1643 b->nonew = 0; 1644 b->diag = 0; 1645 b->solve_work = 0; 1646 b->mult_work = 0; 1647 B->spptr = 0; 1648 B->info.nz_unneeded = (PetscReal)b->maxnz; 1649 b->keepzeroedrows = PETSC_FALSE; 1650 1651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1652 "MatStoreValues_SeqBAIJ", 1653 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1654 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1655 "MatRetrieveValues_SeqBAIJ", 1656 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1658 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1659 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1661 "MatConvert_SeqBAIJ_SeqAIJ", 1662 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 EXTERN_C_END 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1669 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1670 { 1671 Mat C; 1672 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1673 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1674 1675 PetscFunctionBegin; 1676 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1677 1678 *B = 0; 1679 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1680 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1681 c = (Mat_SeqBAIJ*)C->data; 1682 1683 c->bs = a->bs; 1684 c->bs2 = a->bs2; 1685 c->mbs = a->mbs; 1686 c->nbs = a->nbs; 1687 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1688 1689 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1690 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1691 for (i=0; i<mbs; i++) { 1692 c->imax[i] = a->imax[i]; 1693 c->ilen[i] = a->ilen[i]; 1694 } 1695 1696 /* allocate the matrix space */ 1697 c->singlemalloc = PETSC_TRUE; 1698 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1699 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1700 c->j = (int*)(c->a + nz*bs2); 1701 c->i = c->j + nz; 1702 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1703 if (mbs > 0) { 1704 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1705 if (cpvalues == MAT_COPY_VALUES) { 1706 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1707 } else { 1708 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1709 } 1710 } 1711 1712 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1713 c->sorted = a->sorted; 1714 c->roworiented = a->roworiented; 1715 c->nonew = a->nonew; 1716 1717 if (a->diag) { 1718 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1719 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1720 for (i=0; i<mbs; i++) { 1721 c->diag[i] = a->diag[i]; 1722 } 1723 } else c->diag = 0; 1724 c->nz = a->nz; 1725 c->maxnz = a->maxnz; 1726 c->solve_work = 0; 1727 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1728 c->mult_work = 0; 1729 C->preallocated = PETSC_TRUE; 1730 C->assembled = PETSC_TRUE; 1731 *B = C; 1732 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 EXTERN_C_BEGIN 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatLoad_SeqBAIJ" 1739 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1740 { 1741 Mat_SeqBAIJ *a; 1742 Mat B; 1743 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1744 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1745 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1746 int *masked,nmask,tmp,bs2,ishift; 1747 PetscScalar *aa; 1748 MPI_Comm comm = ((PetscObject)viewer)->comm; 1749 1750 PetscFunctionBegin; 1751 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1752 bs2 = bs*bs; 1753 1754 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1755 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1756 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1757 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1758 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1759 M = header[1]; N = header[2]; nz = header[3]; 1760 1761 if (header[3] < 0) { 1762 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1763 } 1764 1765 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1766 1767 /* 1768 This code adds extra rows to make sure the number of rows is 1769 divisible by the blocksize 1770 */ 1771 mbs = M/bs; 1772 extra_rows = bs - M + bs*(mbs); 1773 if (extra_rows == bs) extra_rows = 0; 1774 else mbs++; 1775 if (extra_rows) { 1776 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1777 } 1778 1779 /* read in row lengths */ 1780 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1781 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1782 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1783 1784 /* read in column indices */ 1785 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1786 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1787 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1788 1789 /* loop over row lengths determining block row lengths */ 1790 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1791 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1792 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1793 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1794 masked = mask + mbs; 1795 rowcount = 0; nzcount = 0; 1796 for (i=0; i<mbs; i++) { 1797 nmask = 0; 1798 for (j=0; j<bs; j++) { 1799 kmax = rowlengths[rowcount]; 1800 for (k=0; k<kmax; k++) { 1801 tmp = jj[nzcount++]/bs; 1802 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1803 } 1804 rowcount++; 1805 } 1806 browlengths[i] += nmask; 1807 /* zero out the mask elements we set */ 1808 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1809 } 1810 1811 /* create our matrix */ 1812 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1813 B = *A; 1814 a = (Mat_SeqBAIJ*)B->data; 1815 1816 /* set matrix "i" values */ 1817 a->i[0] = 0; 1818 for (i=1; i<= mbs; i++) { 1819 a->i[i] = a->i[i-1] + browlengths[i-1]; 1820 a->ilen[i-1] = browlengths[i-1]; 1821 } 1822 a->nz = 0; 1823 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1824 1825 /* read in nonzero values */ 1826 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1827 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1828 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1829 1830 /* set "a" and "j" values into matrix */ 1831 nzcount = 0; jcount = 0; 1832 for (i=0; i<mbs; i++) { 1833 nzcountb = nzcount; 1834 nmask = 0; 1835 for (j=0; j<bs; j++) { 1836 kmax = rowlengths[i*bs+j]; 1837 for (k=0; k<kmax; k++) { 1838 tmp = jj[nzcount++]/bs; 1839 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1840 } 1841 } 1842 /* sort the masked values */ 1843 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1844 1845 /* set "j" values into matrix */ 1846 maskcount = 1; 1847 for (j=0; j<nmask; j++) { 1848 a->j[jcount++] = masked[j]; 1849 mask[masked[j]] = maskcount++; 1850 } 1851 /* set "a" values into matrix */ 1852 ishift = bs2*a->i[i]; 1853 for (j=0; j<bs; j++) { 1854 kmax = rowlengths[i*bs+j]; 1855 for (k=0; k<kmax; k++) { 1856 tmp = jj[nzcountb]/bs ; 1857 block = mask[tmp] - 1; 1858 point = jj[nzcountb] - bs*tmp; 1859 idx = ishift + bs2*block + j + bs*point; 1860 a->a[idx] = (MatScalar)aa[nzcountb++]; 1861 } 1862 } 1863 /* zero out the mask elements we set */ 1864 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1865 } 1866 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1867 1868 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1869 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1870 ierr = PetscFree(aa);CHKERRQ(ierr); 1871 ierr = PetscFree(jj);CHKERRQ(ierr); 1872 ierr = PetscFree(mask);CHKERRQ(ierr); 1873 1874 B->assembled = PETSC_TRUE; 1875 1876 ierr = MatView_Private(B);CHKERRQ(ierr); 1877 PetscFunctionReturn(0); 1878 } 1879 EXTERN_C_END 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatCreateSeqBAIJ" 1883 /*@C 1884 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1885 compressed row) format. For good matrix assembly performance the 1886 user should preallocate the matrix storage by setting the parameter nz 1887 (or the array nnz). By setting these parameters accurately, performance 1888 during matrix assembly can be increased by more than a factor of 50. 1889 1890 Collective on MPI_Comm 1891 1892 Input Parameters: 1893 + comm - MPI communicator, set to PETSC_COMM_SELF 1894 . bs - size of block 1895 . m - number of rows 1896 . n - number of columns 1897 . nz - number of nonzero blocks per block row (same for all rows) 1898 - nnz - array containing the number of nonzero blocks in the various block rows 1899 (possibly different for each block row) or PETSC_NULL 1900 1901 Output Parameter: 1902 . A - the matrix 1903 1904 Options Database Keys: 1905 . -mat_no_unroll - uses code that does not unroll the loops in the 1906 block calculations (much slower) 1907 . -mat_block_size - size of the blocks to use 1908 1909 Level: intermediate 1910 1911 Notes: 1912 A nonzero block is any block that as 1 or more nonzeros in it 1913 1914 The block AIJ format is fully compatible with standard Fortran 77 1915 storage. That is, the stored row and column indices can begin at 1916 either one (as in Fortran) or zero. See the users' manual for details. 1917 1918 Specify the preallocated storage with either nz or nnz (not both). 1919 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1920 allocation. For additional details, see the users manual chapter on 1921 matrices. 1922 1923 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1924 @*/ 1925 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1926 { 1927 int ierr; 1928 1929 PetscFunctionBegin; 1930 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1931 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1932 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1938 /*@C 1939 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1940 per row in the matrix. For good matrix assembly performance the 1941 user should preallocate the matrix storage by setting the parameter nz 1942 (or the array nnz). By setting these parameters accurately, performance 1943 during matrix assembly can be increased by more than a factor of 50. 1944 1945 Collective on MPI_Comm 1946 1947 Input Parameters: 1948 + A - the matrix 1949 . bs - size of block 1950 . nz - number of block nonzeros per block row (same for all rows) 1951 - nnz - array containing the number of block nonzeros in the various block rows 1952 (possibly different for each block row) or PETSC_NULL 1953 1954 Options Database Keys: 1955 . -mat_no_unroll - uses code that does not unroll the loops in the 1956 block calculations (much slower) 1957 . -mat_block_size - size of the blocks to use 1958 1959 Level: intermediate 1960 1961 Notes: 1962 The block AIJ format is fully compatible with standard Fortran 77 1963 storage. That is, the stored row and column indices can begin at 1964 either one (as in Fortran) or zero. See the users' manual for details. 1965 1966 Specify the preallocated storage with either nz or nnz (not both). 1967 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1968 allocation. For additional details, see the users manual chapter on 1969 matrices. 1970 1971 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1972 @*/ 1973 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1974 { 1975 Mat_SeqBAIJ *b; 1976 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1977 PetscTruth flg; 1978 1979 PetscFunctionBegin; 1980 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1981 if (!flg) PetscFunctionReturn(0); 1982 1983 B->preallocated = PETSC_TRUE; 1984 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1985 if (nnz && newbs != bs) { 1986 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1987 } 1988 bs = newbs; 1989 1990 mbs = B->m/bs; 1991 nbs = B->n/bs; 1992 bs2 = bs*bs; 1993 1994 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1995 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1996 } 1997 1998 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1999 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2000 if (nnz) { 2001 for (i=0; i<mbs; i++) { 2002 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2003 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2004 } 2005 } 2006 2007 b = (Mat_SeqBAIJ*)B->data; 2008 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2009 B->ops->solve = MatSolve_SeqBAIJ_Update; 2010 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2011 if (!flg) { 2012 switch (bs) { 2013 case 1: 2014 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2015 B->ops->mult = MatMult_SeqBAIJ_1; 2016 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2017 break; 2018 case 2: 2019 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2020 B->ops->mult = MatMult_SeqBAIJ_2; 2021 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2022 break; 2023 case 3: 2024 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2025 B->ops->mult = MatMult_SeqBAIJ_3; 2026 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2027 break; 2028 case 4: 2029 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2030 B->ops->mult = MatMult_SeqBAIJ_4; 2031 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2032 break; 2033 case 5: 2034 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2035 B->ops->mult = MatMult_SeqBAIJ_5; 2036 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2037 break; 2038 case 6: 2039 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2040 B->ops->mult = MatMult_SeqBAIJ_6; 2041 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2042 break; 2043 case 7: 2044 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2045 B->ops->mult = MatMult_SeqBAIJ_7; 2046 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2047 break; 2048 default: 2049 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2050 B->ops->mult = MatMult_SeqBAIJ_N; 2051 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2052 break; 2053 } 2054 } 2055 b->bs = bs; 2056 b->mbs = mbs; 2057 b->nbs = nbs; 2058 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2059 if (!nnz) { 2060 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2061 else if (nz <= 0) nz = 1; 2062 for (i=0; i<mbs; i++) b->imax[i] = nz; 2063 nz = nz*mbs; 2064 } else { 2065 nz = 0; 2066 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2067 } 2068 2069 /* allocate the matrix space */ 2070 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2071 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2072 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2073 b->j = (int*)(b->a + nz*bs2); 2074 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2075 b->i = b->j + nz; 2076 b->singlemalloc = PETSC_TRUE; 2077 2078 b->i[0] = 0; 2079 for (i=1; i<mbs+1; i++) { 2080 b->i[i] = b->i[i-1] + b->imax[i-1]; 2081 } 2082 2083 /* b->ilen will count nonzeros in each block row so far. */ 2084 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2085 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2086 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2087 2088 b->bs = bs; 2089 b->bs2 = bs2; 2090 b->mbs = mbs; 2091 b->nz = 0; 2092 b->maxnz = nz*bs2; 2093 B->info.nz_unneeded = (PetscReal)b->maxnz; 2094 PetscFunctionReturn(0); 2095 } 2096 2097