1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 /* 12 Special version for Fun3d sequential benchmark 13 */ 14 #if defined(PETSC_HAVE_FORTRAN_CAPS) 15 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 16 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 17 #define matsetvaluesblocked4_ matsetvaluesblocked4 18 #endif 19 20 EXTERN_C_BEGIN 21 #undef __FUNCT__ 22 #define __FUNCT__ "matsetvaluesblocked4_" 23 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 24 { 25 Mat A = *AA; 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28 int *ai=a->i,*ailen=a->ilen; 29 int *aj=a->j,stepval; 30 const PetscScalar *value = v; 31 MatScalar *ap,*aa = a->a,*bap; 32 33 PetscFunctionBegin; 34 stepval = (n-1)*4; 35 for (k=0; k<m; k++) { /* loop over added rows */ 36 row = im[k]; 37 rp = aj + ai[row]; 38 ap = aa + 16*ai[row]; 39 nrow = ailen[row]; 40 low = 0; 41 for (l=0; l<n; l++) { /* loop over added columns */ 42 col = in[l]; 43 value = v + k*(stepval+4)*4 + l*4; 44 low = 0; high = nrow; 45 while (high-low > 7) { 46 t = (low+high)/2; 47 if (rp[t] > col) high = t; 48 else low = t; 49 } 50 for (i=low; i<high; i++) { 51 if (rp[i] > col) break; 52 if (rp[i] == col) { 53 bap = ap + 16*i; 54 for (ii=0; ii<4; ii++,value+=stepval) { 55 for (jj=ii; jj<16; jj+=4) { 56 bap[jj] += *value++; 57 } 58 } 59 goto noinsert2; 60 } 61 } 62 N = nrow++ - 1; 63 /* shift up all the later entries in this row */ 64 for (ii=N; ii>=i; ii--) { 65 rp[ii+1] = rp[ii]; 66 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67 } 68 if (N >= i) { 69 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70 } 71 rp[i] = col; 72 bap = ap + 16*i; 73 for (ii=0; ii<4; ii++,value+=stepval) { 74 for (jj=ii; jj<16; jj+=4) { 75 bap[jj] = *value++; 76 } 77 } 78 noinsert2:; 79 low = i; 80 } 81 ailen[row] = nrow; 82 } 83 } 84 EXTERN_C_END 85 86 #if defined(PETSC_HAVE_FORTRAN_CAPS) 87 #define matsetvalues4_ MATSETVALUES4 88 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 89 #define matsetvalues4_ matsetvalues4 90 #endif 91 92 EXTERN_C_BEGIN 93 #undef __FUNCT__ 94 #define __FUNCT__ "MatSetValues4_" 95 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 96 { 97 Mat A = *AA; 98 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 99 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 100 int *ai=a->i,*ailen=a->ilen; 101 int *aj=a->j,brow,bcol; 102 int ridx,cidx; 103 MatScalar *ap,value,*aa=a->a,*bap; 104 105 PetscFunctionBegin; 106 for (k=0; k<m; k++) { /* loop over added rows */ 107 row = im[k]; brow = row/4; 108 rp = aj + ai[brow]; 109 ap = aa + 16*ai[brow]; 110 nrow = ailen[brow]; 111 low = 0; 112 for (l=0; l<n; l++) { /* loop over added columns */ 113 col = in[l]; bcol = col/4; 114 ridx = row % 4; cidx = col % 4; 115 value = v[l + k*n]; 116 low = 0; high = nrow; 117 while (high-low > 7) { 118 t = (low+high)/2; 119 if (rp[t] > bcol) high = t; 120 else low = t; 121 } 122 for (i=low; i<high; i++) { 123 if (rp[i] > bcol) break; 124 if (rp[i] == bcol) { 125 bap = ap + 16*i + 4*cidx + ridx; 126 *bap += value; 127 goto noinsert1; 128 } 129 } 130 N = nrow++ - 1; 131 /* shift up all the later entries in this row */ 132 for (ii=N; ii>=i; ii--) { 133 rp[ii+1] = rp[ii]; 134 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 135 } 136 if (N>=i) { 137 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 138 } 139 rp[i] = bcol; 140 ap[16*i + 4*cidx + ridx] = value; 141 noinsert1:; 142 low = i; 143 } 144 ailen[brow] = nrow; 145 } 146 } 147 EXTERN_C_END 148 149 /* UGLY, ugly, ugly 150 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 151 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 152 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 153 converts the entries into single precision and then calls ..._MatScalar() to put them 154 into the single precision data structures. 155 */ 156 #if defined(PETSC_USE_MAT_SINGLE) 157 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 158 #else 159 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 160 #endif 161 162 #define CHUNKSIZE 10 163 164 /* 165 Checks for missing diagonals 166 */ 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169 int MatMissingDiagonal_SeqBAIJ(Mat A) 170 { 171 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172 int *diag,*jj = a->j,i,ierr; 173 174 PetscFunctionBegin; 175 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176 diag = a->diag; 177 for (i=0; i<a->mbs; i++) { 178 if (jj[diag[i]] != i) { 179 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187 int MatMarkDiagonal_SeqBAIJ(Mat A) 188 { 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 190 int i,j,*diag,m = a->mbs,ierr; 191 192 PetscFunctionBegin; 193 if (a->diag) PetscFunctionReturn(0); 194 195 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 197 for (i=0; i<m; i++) { 198 diag[i] = a->i[i+1]; 199 for (j=a->i[i]; j<a->i[i+1]; j++) { 200 if (a->j[j] == i) { 201 diag[i] = j; 202 break; 203 } 204 } 205 } 206 a->diag = diag; 207 PetscFunctionReturn(0); 208 } 209 210 211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 int ierr,n = a->mbs,i; 219 220 PetscFunctionBegin; 221 *nn = n; 222 if (!ia) PetscFunctionReturn(0); 223 if (symmetric) { 224 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 225 } else if (oshift == 1) { 226 /* temporarily add 1 to i and j indices */ 227 int nz = a->i[n]; 228 for (i=0; i<nz; i++) a->j[i]++; 229 for (i=0; i<n+1; i++) a->i[i]++; 230 *ia = a->i; *ja = a->j; 231 } else { 232 *ia = a->i; *ja = a->j; 233 } 234 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243 int i,n = a->mbs,ierr; 244 245 PetscFunctionBegin; 246 if (!ia) PetscFunctionReturn(0); 247 if (symmetric) { 248 ierr = PetscFree(*ia);CHKERRQ(ierr); 249 ierr = PetscFree(*ja);CHKERRQ(ierr); 250 } else if (oshift == 1) { 251 int nz = a->i[n]-1; 252 for (i=0; i<nz; i++) a->j[i]--; 253 for (i=0; i<n+1; i++) a->i[i]--; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 261 { 262 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 263 264 PetscFunctionBegin; 265 *bs = baij->bs; 266 PetscFunctionReturn(0); 267 } 268 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatDestroy_SeqBAIJ" 272 int MatDestroy_SeqBAIJ(Mat A) 273 { 274 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275 int ierr; 276 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_LOG) 279 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 280 #endif 281 ierr = PetscFree(a->a);CHKERRQ(ierr); 282 if (!a->singlemalloc) { 283 ierr = PetscFree(a->i);CHKERRQ(ierr); 284 ierr = PetscFree(a->j);CHKERRQ(ierr); 285 } 286 if (a->row) { 287 ierr = ISDestroy(a->row);CHKERRQ(ierr); 288 } 289 if (a->col) { 290 ierr = ISDestroy(a->col);CHKERRQ(ierr); 291 } 292 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299 #if defined(PETSC_USE_MAT_SINGLE) 300 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301 #endif 302 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 303 304 ierr = PetscFree(a);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatSetOption_SeqBAIJ" 310 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 311 { 312 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 313 314 PetscFunctionBegin; 315 switch (op) { 316 case MAT_ROW_ORIENTED: 317 a->roworiented = PETSC_TRUE; 318 break; 319 case MAT_COLUMN_ORIENTED: 320 a->roworiented = PETSC_FALSE; 321 break; 322 case MAT_COLUMNS_SORTED: 323 a->sorted = PETSC_TRUE; 324 break; 325 case MAT_COLUMNS_UNSORTED: 326 a->sorted = PETSC_FALSE; 327 break; 328 case MAT_KEEP_ZEROED_ROWS: 329 a->keepzeroedrows = PETSC_TRUE; 330 break; 331 case MAT_NO_NEW_NONZERO_LOCATIONS: 332 a->nonew = 1; 333 break; 334 case MAT_NEW_NONZERO_LOCATION_ERR: 335 a->nonew = -1; 336 break; 337 case MAT_NEW_NONZERO_ALLOCATION_ERR: 338 a->nonew = -2; 339 break; 340 case MAT_YES_NEW_NONZERO_LOCATIONS: 341 a->nonew = 0; 342 break; 343 case MAT_ROWS_SORTED: 344 case MAT_ROWS_UNSORTED: 345 case MAT_YES_NEW_DIAGONALS: 346 case MAT_IGNORE_OFF_PROC_ENTRIES: 347 case MAT_USE_HASH_TABLE: 348 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 349 break; 350 case MAT_NO_NEW_DIAGONALS: 351 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 352 case MAT_SYMMETRIC: 353 case MAT_STRUCTURALLY_SYMMETRIC: 354 case MAT_NOT_SYMMETRIC: 355 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 356 case MAT_HERMITIAN: 357 case MAT_NOT_HERMITIAN: 358 case MAT_SYMMETRY_ETERNAL: 359 case MAT_NOT_SYMMETRY_ETERNAL: 360 break; 361 default: 362 SETERRQ(PETSC_ERR_SUP,"unknown option"); 363 } 364 PetscFunctionReturn(0); 365 } 366 367 #undef __FUNCT__ 368 #define __FUNCT__ "MatGetRow_SeqBAIJ" 369 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 370 { 371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 372 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 373 MatScalar *aa,*aa_i; 374 PetscScalar *v_i; 375 376 PetscFunctionBegin; 377 bs = a->bs; 378 ai = a->i; 379 aj = a->j; 380 aa = a->a; 381 bs2 = a->bs2; 382 383 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 384 385 bn = row/bs; /* Block number */ 386 bp = row % bs; /* Block Position */ 387 M = ai[bn+1] - ai[bn]; 388 *nz = bs*M; 389 390 if (v) { 391 *v = 0; 392 if (*nz) { 393 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 394 for (i=0; i<M; i++) { /* for each block in the block row */ 395 v_i = *v + i*bs; 396 aa_i = aa + bs2*(ai[bn] + i); 397 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 398 } 399 } 400 } 401 402 if (idx) { 403 *idx = 0; 404 if (*nz) { 405 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 406 for (i=0; i<M; i++) { /* for each block in the block row */ 407 idx_i = *idx + i*bs; 408 itmp = bs*aj[ai[bn] + i]; 409 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 410 } 411 } 412 } 413 PetscFunctionReturn(0); 414 } 415 416 #undef __FUNCT__ 417 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 418 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 419 { 420 int ierr; 421 422 PetscFunctionBegin; 423 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 424 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatTranspose_SeqBAIJ" 430 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 431 { 432 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 433 Mat C; 434 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 435 int *rows,*cols,bs2=a->bs2; 436 PetscScalar *array; 437 438 PetscFunctionBegin; 439 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 440 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 441 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 442 443 #if defined(PETSC_USE_MAT_SINGLE) 444 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 445 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 446 #else 447 array = a->a; 448 #endif 449 450 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 451 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 452 ierr = PetscFree(col);CHKERRQ(ierr); 453 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 454 cols = rows + bs; 455 for (i=0; i<mbs; i++) { 456 cols[0] = i*bs; 457 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 458 len = ai[i+1] - ai[i]; 459 for (j=0; j<len; j++) { 460 rows[0] = (*aj++)*bs; 461 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 462 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 463 array += bs2; 464 } 465 } 466 ierr = PetscFree(rows);CHKERRQ(ierr); 467 #if defined(PETSC_USE_MAT_SINGLE) 468 ierr = PetscFree(array); 469 #endif 470 471 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 472 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 473 474 if (B) { 475 *B = C; 476 } else { 477 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 484 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 487 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 488 PetscScalar *aa; 489 FILE *file; 490 491 PetscFunctionBegin; 492 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 493 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 494 col_lens[0] = MAT_FILE_COOKIE; 495 496 col_lens[1] = A->m; 497 col_lens[2] = A->n; 498 col_lens[3] = a->nz*bs2; 499 500 /* store lengths of each row and write (including header) to file */ 501 count = 0; 502 for (i=0; i<a->mbs; i++) { 503 for (j=0; j<bs; j++) { 504 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 505 } 506 } 507 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 508 ierr = PetscFree(col_lens);CHKERRQ(ierr); 509 510 /* store column indices (zero start index) */ 511 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 512 count = 0; 513 for (i=0; i<a->mbs; i++) { 514 for (j=0; j<bs; j++) { 515 for (k=a->i[i]; k<a->i[i+1]; k++) { 516 for (l=0; l<bs; l++) { 517 jj[count++] = bs*a->j[k] + l; 518 } 519 } 520 } 521 } 522 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 523 ierr = PetscFree(jj);CHKERRQ(ierr); 524 525 /* store nonzero values */ 526 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 527 count = 0; 528 for (i=0; i<a->mbs; i++) { 529 for (j=0; j<bs; j++) { 530 for (k=a->i[i]; k<a->i[i+1]; k++) { 531 for (l=0; l<bs; l++) { 532 aa[count++] = a->a[bs2*k + l*bs + j]; 533 } 534 } 535 } 536 } 537 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 538 ierr = PetscFree(aa);CHKERRQ(ierr); 539 540 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 541 if (file) { 542 fprintf(file,"-matload_block_size %d\n",a->bs); 543 } 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 549 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 550 { 551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 552 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 553 PetscViewerFormat format; 554 555 PetscFunctionBegin; 556 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 557 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 558 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 559 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 560 Mat aij; 561 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 562 ierr = MatView(aij,viewer);CHKERRQ(ierr); 563 ierr = MatDestroy(aij);CHKERRQ(ierr); 564 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 565 PetscFunctionReturn(0); 566 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 567 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 568 for (i=0; i<a->mbs; i++) { 569 for (j=0; j<bs; j++) { 570 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 571 for (k=a->i[i]; k<a->i[i+1]; k++) { 572 for (l=0; l<bs; l++) { 573 #if defined(PETSC_USE_COMPLEX) 574 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 575 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 576 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 577 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 578 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 579 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 580 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 581 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 582 } 583 #else 584 if (a->a[bs2*k + l*bs + j] != 0.0) { 585 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 586 } 587 #endif 588 } 589 } 590 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 591 } 592 } 593 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 594 } else { 595 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 596 for (i=0; i<a->mbs; i++) { 597 for (j=0; j<bs; j++) { 598 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 599 for (k=a->i[i]; k<a->i[i+1]; k++) { 600 for (l=0; l<bs; l++) { 601 #if defined(PETSC_USE_COMPLEX) 602 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 603 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 604 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 605 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 606 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 607 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 608 } else { 609 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 610 } 611 #else 612 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 613 #endif 614 } 615 } 616 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 617 } 618 } 619 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 620 } 621 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 627 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 628 { 629 Mat A = (Mat) Aa; 630 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 631 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 632 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 633 MatScalar *aa; 634 PetscViewer viewer; 635 636 PetscFunctionBegin; 637 638 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 639 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 640 641 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 642 643 /* loop over matrix elements drawing boxes */ 644 color = PETSC_DRAW_BLUE; 645 for (i=0,row=0; i<mbs; i++,row+=bs) { 646 for (j=a->i[i]; j<a->i[i+1]; j++) { 647 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 648 x_l = a->j[j]*bs; x_r = x_l + 1.0; 649 aa = a->a + j*bs2; 650 for (k=0; k<bs; k++) { 651 for (l=0; l<bs; l++) { 652 if (PetscRealPart(*aa++) >= 0.) continue; 653 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 654 } 655 } 656 } 657 } 658 color = PETSC_DRAW_CYAN; 659 for (i=0,row=0; i<mbs; i++,row+=bs) { 660 for (j=a->i[i]; j<a->i[i+1]; j++) { 661 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 662 x_l = a->j[j]*bs; x_r = x_l + 1.0; 663 aa = a->a + j*bs2; 664 for (k=0; k<bs; k++) { 665 for (l=0; l<bs; l++) { 666 if (PetscRealPart(*aa++) != 0.) continue; 667 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 668 } 669 } 670 } 671 } 672 673 color = PETSC_DRAW_RED; 674 for (i=0,row=0; i<mbs; i++,row+=bs) { 675 for (j=a->i[i]; j<a->i[i+1]; j++) { 676 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 677 x_l = a->j[j]*bs; x_r = x_l + 1.0; 678 aa = a->a + j*bs2; 679 for (k=0; k<bs; k++) { 680 for (l=0; l<bs; l++) { 681 if (PetscRealPart(*aa++) <= 0.) continue; 682 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 683 } 684 } 685 } 686 } 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 692 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 693 { 694 int ierr; 695 PetscReal xl,yl,xr,yr,w,h; 696 PetscDraw draw; 697 PetscTruth isnull; 698 699 PetscFunctionBegin; 700 701 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 702 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 703 704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 705 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 706 xr += w; yr += h; xl = -w; yl = -h; 707 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 708 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 709 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatView_SeqBAIJ" 715 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 716 { 717 int ierr; 718 PetscTruth isascii,isbinary,isdraw; 719 720 PetscFunctionBegin; 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 723 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 724 if (isascii){ 725 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 726 } else if (isbinary) { 727 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 728 } else if (isdraw) { 729 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 730 } else { 731 Mat B; 732 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 733 ierr = MatView(B,viewer);CHKERRQ(ierr); 734 ierr = MatDestroy(B);CHKERRQ(ierr); 735 } 736 PetscFunctionReturn(0); 737 } 738 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatGetValues_SeqBAIJ" 742 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 743 { 744 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 745 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 746 int *ai = a->i,*ailen = a->ilen; 747 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 748 MatScalar *ap,*aa = a->a,zero = 0.0; 749 750 PetscFunctionBegin; 751 for (k=0; k<m; k++) { /* loop over rows */ 752 row = im[k]; brow = row/bs; 753 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 754 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 755 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 756 nrow = ailen[brow]; 757 for (l=0; l<n; l++) { /* loop over columns */ 758 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 759 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 760 col = in[l] ; 761 bcol = col/bs; 762 cidx = col%bs; 763 ridx = row%bs; 764 high = nrow; 765 low = 0; /* assume unsorted */ 766 while (high-low > 5) { 767 t = (low+high)/2; 768 if (rp[t] > bcol) high = t; 769 else low = t; 770 } 771 for (i=low; i<high; i++) { 772 if (rp[i] > bcol) break; 773 if (rp[i] == bcol) { 774 *v++ = ap[bs2*i+bs*cidx+ridx]; 775 goto finished; 776 } 777 } 778 *v++ = zero; 779 finished:; 780 } 781 } 782 PetscFunctionReturn(0); 783 } 784 785 #if defined(PETSC_USE_MAT_SINGLE) 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 788 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 789 { 790 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 791 int ierr,i,N = m*n*b->bs2; 792 MatScalar *vsingle; 793 794 PetscFunctionBegin; 795 if (N > b->setvalueslen) { 796 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 797 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 798 b->setvalueslen = N; 799 } 800 vsingle = b->setvaluescopy; 801 for (i=0; i<N; i++) { 802 vsingle[i] = v[i]; 803 } 804 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 #endif 808 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 812 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 813 { 814 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 815 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 816 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 817 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 818 PetscTruth roworiented=a->roworiented; 819 const MatScalar *value = v; 820 MatScalar *ap,*aa = a->a,*bap; 821 822 PetscFunctionBegin; 823 if (roworiented) { 824 stepval = (n-1)*bs; 825 } else { 826 stepval = (m-1)*bs; 827 } 828 for (k=0; k<m; k++) { /* loop over added rows */ 829 row = im[k]; 830 if (row < 0) continue; 831 #if defined(PETSC_USE_BOPT_g) 832 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 833 #endif 834 rp = aj + ai[row]; 835 ap = aa + bs2*ai[row]; 836 rmax = imax[row]; 837 nrow = ailen[row]; 838 low = 0; 839 for (l=0; l<n; l++) { /* loop over added columns */ 840 if (in[l] < 0) continue; 841 #if defined(PETSC_USE_BOPT_g) 842 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 843 #endif 844 col = in[l]; 845 if (roworiented) { 846 value = v + k*(stepval+bs)*bs + l*bs; 847 } else { 848 value = v + l*(stepval+bs)*bs + k*bs; 849 } 850 if (!sorted) low = 0; high = nrow; 851 while (high-low > 7) { 852 t = (low+high)/2; 853 if (rp[t] > col) high = t; 854 else low = t; 855 } 856 for (i=low; i<high; i++) { 857 if (rp[i] > col) break; 858 if (rp[i] == col) { 859 bap = ap + bs2*i; 860 if (roworiented) { 861 if (is == ADD_VALUES) { 862 for (ii=0; ii<bs; ii++,value+=stepval) { 863 for (jj=ii; jj<bs2; jj+=bs) { 864 bap[jj] += *value++; 865 } 866 } 867 } else { 868 for (ii=0; ii<bs; ii++,value+=stepval) { 869 for (jj=ii; jj<bs2; jj+=bs) { 870 bap[jj] = *value++; 871 } 872 } 873 } 874 } else { 875 if (is == ADD_VALUES) { 876 for (ii=0; ii<bs; ii++,value+=stepval) { 877 for (jj=0; jj<bs; jj++) { 878 *bap++ += *value++; 879 } 880 } 881 } else { 882 for (ii=0; ii<bs; ii++,value+=stepval) { 883 for (jj=0; jj<bs; jj++) { 884 *bap++ = *value++; 885 } 886 } 887 } 888 } 889 goto noinsert2; 890 } 891 } 892 if (nonew == 1) goto noinsert2; 893 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 894 if (nrow >= rmax) { 895 /* there is no extra room in row, therefore enlarge */ 896 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 897 MatScalar *new_a; 898 899 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 900 901 /* malloc new storage space */ 902 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 903 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 904 new_j = (int*)(new_a + bs2*new_nz); 905 new_i = new_j + new_nz; 906 907 /* copy over old data into new slots */ 908 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 909 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 910 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 911 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 912 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 913 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 914 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 915 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 916 /* free up old matrix storage */ 917 ierr = PetscFree(a->a);CHKERRQ(ierr); 918 if (!a->singlemalloc) { 919 ierr = PetscFree(a->i);CHKERRQ(ierr); 920 ierr = PetscFree(a->j);CHKERRQ(ierr); 921 } 922 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 923 a->singlemalloc = PETSC_TRUE; 924 925 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 926 rmax = imax[row] = imax[row] + CHUNKSIZE; 927 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 928 a->maxnz += bs2*CHUNKSIZE; 929 a->reallocs++; 930 a->nz++; 931 } 932 N = nrow++ - 1; 933 /* shift up all the later entries in this row */ 934 for (ii=N; ii>=i; ii--) { 935 rp[ii+1] = rp[ii]; 936 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 937 } 938 if (N >= i) { 939 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 940 } 941 rp[i] = col; 942 bap = ap + bs2*i; 943 if (roworiented) { 944 for (ii=0; ii<bs; ii++,value+=stepval) { 945 for (jj=ii; jj<bs2; jj+=bs) { 946 bap[jj] = *value++; 947 } 948 } 949 } else { 950 for (ii=0; ii<bs; ii++,value+=stepval) { 951 for (jj=0; jj<bs; jj++) { 952 *bap++ = *value++; 953 } 954 } 955 } 956 noinsert2:; 957 low = i; 958 } 959 ailen[row] = nrow; 960 } 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 966 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 967 { 968 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 969 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 970 int m = A->m,*ip,N,*ailen = a->ilen; 971 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 972 MatScalar *aa = a->a,*ap; 973 974 PetscFunctionBegin; 975 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 976 977 if (m) rmax = ailen[0]; 978 for (i=1; i<mbs; i++) { 979 /* move each row back by the amount of empty slots (fshift) before it*/ 980 fshift += imax[i-1] - ailen[i-1]; 981 rmax = PetscMax(rmax,ailen[i]); 982 if (fshift) { 983 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 984 N = ailen[i]; 985 for (j=0; j<N; j++) { 986 ip[j-fshift] = ip[j]; 987 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 988 } 989 } 990 ai[i] = ai[i-1] + ailen[i-1]; 991 } 992 if (mbs) { 993 fshift += imax[mbs-1] - ailen[mbs-1]; 994 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 995 } 996 /* reset ilen and imax for each row */ 997 for (i=0; i<mbs; i++) { 998 ailen[i] = imax[i] = ai[i+1] - ai[i]; 999 } 1000 a->nz = ai[mbs]; 1001 1002 /* diagonals may have moved, so kill the diagonal pointers */ 1003 if (fshift && a->diag) { 1004 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1005 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1006 a->diag = 0; 1007 } 1008 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1009 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1010 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1011 a->reallocs = 0; 1012 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1013 1014 PetscFunctionReturn(0); 1015 } 1016 1017 1018 1019 /* 1020 This function returns an array of flags which indicate the locations of contiguous 1021 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1022 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1023 Assume: sizes should be long enough to hold all the values. 1024 */ 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1027 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1028 { 1029 int i,j,k,row; 1030 PetscTruth flg; 1031 1032 PetscFunctionBegin; 1033 for (i=0,j=0; i<n; j++) { 1034 row = idx[i]; 1035 if (row%bs!=0) { /* Not the begining of a block */ 1036 sizes[j] = 1; 1037 i++; 1038 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1039 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1040 i++; 1041 } else { /* Begining of the block, so check if the complete block exists */ 1042 flg = PETSC_TRUE; 1043 for (k=1; k<bs; k++) { 1044 if (row+k != idx[i+k]) { /* break in the block */ 1045 flg = PETSC_FALSE; 1046 break; 1047 } 1048 } 1049 if (flg == PETSC_TRUE) { /* No break in the bs */ 1050 sizes[j] = bs; 1051 i+= bs; 1052 } else { 1053 sizes[j] = 1; 1054 i++; 1055 } 1056 } 1057 } 1058 *bs_max = j; 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1064 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1065 { 1066 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1067 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1068 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1069 PetscScalar zero = 0.0; 1070 MatScalar *aa; 1071 1072 PetscFunctionBegin; 1073 /* Make a copy of the IS and sort it */ 1074 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1075 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1076 1077 /* allocate memory for rows,sizes */ 1078 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1079 sizes = rows + is_n; 1080 1081 /* copy IS values to rows, and sort them */ 1082 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1083 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1084 if (baij->keepzeroedrows) { 1085 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1086 bs_max = is_n; 1087 } else { 1088 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1089 } 1090 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1091 1092 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1093 row = rows[j]; 1094 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1095 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1096 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1097 if (sizes[i] == bs && !baij->keepzeroedrows) { 1098 if (diag) { 1099 if (baij->ilen[row/bs] > 0) { 1100 baij->ilen[row/bs] = 1; 1101 baij->j[baij->i[row/bs]] = row/bs; 1102 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1103 } 1104 /* Now insert all the diagonal values for this bs */ 1105 for (k=0; k<bs; k++) { 1106 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1107 } 1108 } else { /* (!diag) */ 1109 baij->ilen[row/bs] = 0; 1110 } /* end (!diag) */ 1111 } else { /* (sizes[i] != bs) */ 1112 #if defined (PETSC_USE_DEBUG) 1113 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1114 #endif 1115 for (k=0; k<count; k++) { 1116 aa[0] = zero; 1117 aa += bs; 1118 } 1119 if (diag) { 1120 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1121 } 1122 } 1123 } 1124 1125 ierr = PetscFree(rows);CHKERRQ(ierr); 1126 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1132 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1133 { 1134 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1135 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1136 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1137 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1138 int ridx,cidx,bs2=a->bs2,ierr; 1139 PetscTruth roworiented=a->roworiented; 1140 MatScalar *ap,value,*aa=a->a,*bap; 1141 1142 PetscFunctionBegin; 1143 for (k=0; k<m; k++) { /* loop over added rows */ 1144 row = im[k]; brow = row/bs; 1145 if (row < 0) continue; 1146 #if defined(PETSC_USE_BOPT_g) 1147 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1148 #endif 1149 rp = aj + ai[brow]; 1150 ap = aa + bs2*ai[brow]; 1151 rmax = imax[brow]; 1152 nrow = ailen[brow]; 1153 low = 0; 1154 for (l=0; l<n; l++) { /* loop over added columns */ 1155 if (in[l] < 0) continue; 1156 #if defined(PETSC_USE_BOPT_g) 1157 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1158 #endif 1159 col = in[l]; bcol = col/bs; 1160 ridx = row % bs; cidx = col % bs; 1161 if (roworiented) { 1162 value = v[l + k*n]; 1163 } else { 1164 value = v[k + l*m]; 1165 } 1166 if (!sorted) low = 0; high = nrow; 1167 while (high-low > 7) { 1168 t = (low+high)/2; 1169 if (rp[t] > bcol) high = t; 1170 else low = t; 1171 } 1172 for (i=low; i<high; i++) { 1173 if (rp[i] > bcol) break; 1174 if (rp[i] == bcol) { 1175 bap = ap + bs2*i + bs*cidx + ridx; 1176 if (is == ADD_VALUES) *bap += value; 1177 else *bap = value; 1178 goto noinsert1; 1179 } 1180 } 1181 if (nonew == 1) goto noinsert1; 1182 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1183 if (nrow >= rmax) { 1184 /* there is no extra room in row, therefore enlarge */ 1185 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1186 MatScalar *new_a; 1187 1188 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1189 1190 /* Malloc new storage space */ 1191 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1192 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1193 new_j = (int*)(new_a + bs2*new_nz); 1194 new_i = new_j + new_nz; 1195 1196 /* copy over old data into new slots */ 1197 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1198 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1199 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1200 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1201 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1202 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1203 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1204 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1205 /* free up old matrix storage */ 1206 ierr = PetscFree(a->a);CHKERRQ(ierr); 1207 if (!a->singlemalloc) { 1208 ierr = PetscFree(a->i);CHKERRQ(ierr); 1209 ierr = PetscFree(a->j);CHKERRQ(ierr); 1210 } 1211 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1212 a->singlemalloc = PETSC_TRUE; 1213 1214 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1215 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1216 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1217 a->maxnz += bs2*CHUNKSIZE; 1218 a->reallocs++; 1219 a->nz++; 1220 } 1221 N = nrow++ - 1; 1222 /* shift up all the later entries in this row */ 1223 for (ii=N; ii>=i; ii--) { 1224 rp[ii+1] = rp[ii]; 1225 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1226 } 1227 if (N>=i) { 1228 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1229 } 1230 rp[i] = bcol; 1231 ap[bs2*i + bs*cidx + ridx] = value; 1232 noinsert1:; 1233 low = i; 1234 } 1235 ailen[brow] = nrow; 1236 } 1237 PetscFunctionReturn(0); 1238 } 1239 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1243 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1244 { 1245 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1246 Mat outA; 1247 int ierr; 1248 PetscTruth row_identity,col_identity; 1249 1250 PetscFunctionBegin; 1251 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1252 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1253 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1254 if (!row_identity || !col_identity) { 1255 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1256 } 1257 1258 outA = inA; 1259 inA->factor = FACTOR_LU; 1260 1261 if (!a->diag) { 1262 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1263 } 1264 1265 a->row = row; 1266 a->col = col; 1267 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1268 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1269 1270 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1271 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1272 PetscLogObjectParent(inA,a->icol); 1273 1274 /* 1275 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1276 for ILU(0) factorization with natural ordering 1277 */ 1278 if (a->bs < 8) { 1279 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1280 } else { 1281 if (!a->solve_work) { 1282 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1283 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1284 } 1285 } 1286 1287 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1288 1289 PetscFunctionReturn(0); 1290 } 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1293 int MatPrintHelp_SeqBAIJ(Mat A) 1294 { 1295 static PetscTruth called = PETSC_FALSE; 1296 MPI_Comm comm = A->comm; 1297 int ierr; 1298 1299 PetscFunctionBegin; 1300 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1301 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1302 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 EXTERN_C_BEGIN 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1309 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1310 { 1311 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1312 int i,nz,nbs; 1313 1314 PetscFunctionBegin; 1315 nz = baij->maxnz/baij->bs2; 1316 nbs = baij->nbs; 1317 for (i=0; i<nz; i++) { 1318 baij->j[i] = indices[i]; 1319 } 1320 baij->nz = nz; 1321 for (i=0; i<nbs; i++) { 1322 baij->ilen[i] = baij->imax[i]; 1323 } 1324 1325 PetscFunctionReturn(0); 1326 } 1327 EXTERN_C_END 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1331 /*@ 1332 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1333 in the matrix. 1334 1335 Input Parameters: 1336 + mat - the SeqBAIJ matrix 1337 - indices - the column indices 1338 1339 Level: advanced 1340 1341 Notes: 1342 This can be called if you have precomputed the nonzero structure of the 1343 matrix and want to provide it to the matrix object to improve the performance 1344 of the MatSetValues() operation. 1345 1346 You MUST have set the correct numbers of nonzeros per row in the call to 1347 MatCreateSeqBAIJ(). 1348 1349 MUST be called before any calls to MatSetValues(); 1350 1351 @*/ 1352 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1353 { 1354 int ierr,(*f)(Mat,int *); 1355 1356 PetscFunctionBegin; 1357 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1358 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1359 if (f) { 1360 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1361 } else { 1362 SETERRQ(1,"Wrong type of matrix to set column indices"); 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1369 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1370 { 1371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1372 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1373 PetscReal atmp; 1374 PetscScalar *x,zero = 0.0; 1375 MatScalar *aa; 1376 int ncols,brow,krow,kcol; 1377 1378 PetscFunctionBegin; 1379 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1380 bs = a->bs; 1381 aa = a->a; 1382 ai = a->i; 1383 aj = a->j; 1384 mbs = a->mbs; 1385 1386 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1387 ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 1388 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1389 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1390 for (i=0; i<mbs; i++) { 1391 ncols = ai[1] - ai[0]; ai++; 1392 brow = bs*i; 1393 for (j=0; j<ncols; j++){ 1394 /* bcol = bs*(*aj); */ 1395 for (kcol=0; kcol<bs; kcol++){ 1396 for (krow=0; krow<bs; krow++){ 1397 atmp = PetscAbsScalar(*aa); aa++; 1398 row = brow + krow; /* row index */ 1399 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1400 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1401 } 1402 } 1403 aj++; 1404 } 1405 } 1406 ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1412 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1413 { 1414 int ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1423 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1424 { 1425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1426 PetscFunctionBegin; 1427 *array = a->a; 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1433 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1434 { 1435 PetscFunctionBegin; 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #include "petscblaslapack.h" 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1442 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1443 { 1444 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1445 int ierr,one=1,i,bs=y->bs,j,bs2; 1446 1447 PetscFunctionBegin; 1448 if (str == SAME_NONZERO_PATTERN) { 1449 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1450 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1451 if (y->xtoy && y->XtoY != X) { 1452 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1453 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1454 } 1455 if (!y->xtoy) { /* get xtoy */ 1456 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1457 y->XtoY = X; 1458 } 1459 bs2 = bs*bs; 1460 for (i=0; i<x->nz; i++) { 1461 j = 0; 1462 while (j < bs2){ 1463 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1464 j++; 1465 } 1466 } 1467 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1468 } else { 1469 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 /* -------------------------------------------------------------------*/ 1475 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1476 MatGetRow_SeqBAIJ, 1477 MatRestoreRow_SeqBAIJ, 1478 MatMult_SeqBAIJ_N, 1479 /* 4*/ MatMultAdd_SeqBAIJ_N, 1480 MatMultTranspose_SeqBAIJ, 1481 MatMultTransposeAdd_SeqBAIJ, 1482 MatSolve_SeqBAIJ_N, 1483 0, 1484 0, 1485 /*10*/ 0, 1486 MatLUFactor_SeqBAIJ, 1487 0, 1488 0, 1489 MatTranspose_SeqBAIJ, 1490 /*15*/ MatGetInfo_SeqBAIJ, 1491 MatEqual_SeqBAIJ, 1492 MatGetDiagonal_SeqBAIJ, 1493 MatDiagonalScale_SeqBAIJ, 1494 MatNorm_SeqBAIJ, 1495 /*20*/ 0, 1496 MatAssemblyEnd_SeqBAIJ, 1497 0, 1498 MatSetOption_SeqBAIJ, 1499 MatZeroEntries_SeqBAIJ, 1500 /*25*/ MatZeroRows_SeqBAIJ, 1501 MatLUFactorSymbolic_SeqBAIJ, 1502 MatLUFactorNumeric_SeqBAIJ_N, 1503 0, 1504 0, 1505 /*30*/ MatSetUpPreallocation_SeqBAIJ, 1506 MatILUFactorSymbolic_SeqBAIJ, 1507 0, 1508 MatGetArray_SeqBAIJ, 1509 MatRestoreArray_SeqBAIJ, 1510 /*35*/ MatDuplicate_SeqBAIJ, 1511 0, 1512 0, 1513 MatILUFactor_SeqBAIJ, 1514 0, 1515 /*40*/ MatAXPY_SeqBAIJ, 1516 MatGetSubMatrices_SeqBAIJ, 1517 MatIncreaseOverlap_SeqBAIJ, 1518 MatGetValues_SeqBAIJ, 1519 0, 1520 /*45*/ MatPrintHelp_SeqBAIJ, 1521 MatScale_SeqBAIJ, 1522 0, 1523 0, 1524 0, 1525 /*50*/ MatGetBlockSize_SeqBAIJ, 1526 MatGetRowIJ_SeqBAIJ, 1527 MatRestoreRowIJ_SeqBAIJ, 1528 0, 1529 0, 1530 /*55*/ 0, 1531 0, 1532 0, 1533 0, 1534 MatSetValuesBlocked_SeqBAIJ, 1535 /*60*/ MatGetSubMatrix_SeqBAIJ, 1536 MatDestroy_SeqBAIJ, 1537 MatView_SeqBAIJ, 1538 MatGetPetscMaps_Petsc, 1539 0, 1540 /*65*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*70*/ MatGetRowMax_SeqBAIJ, 1546 MatConvert_Basic, 1547 0, 1548 0, 1549 0, 1550 /*75*/ 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*80*/ 0, 1556 0, 1557 0, 1558 0, 1559 /*85*/ MatLoad_SeqBAIJ 1560 }; 1561 1562 EXTERN_C_BEGIN 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1565 int MatStoreValues_SeqBAIJ(Mat mat) 1566 { 1567 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1568 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1569 int ierr; 1570 1571 PetscFunctionBegin; 1572 if (aij->nonew != 1) { 1573 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1574 } 1575 1576 /* allocate space for values if not already there */ 1577 if (!aij->saved_values) { 1578 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1579 } 1580 1581 /* copy values over */ 1582 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 EXTERN_C_END 1586 1587 EXTERN_C_BEGIN 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1590 int MatRetrieveValues_SeqBAIJ(Mat mat) 1591 { 1592 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1593 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1594 1595 PetscFunctionBegin; 1596 if (aij->nonew != 1) { 1597 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1598 } 1599 if (!aij->saved_values) { 1600 SETERRQ(1,"Must call MatStoreValues(A);first"); 1601 } 1602 1603 /* copy values over */ 1604 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 EXTERN_C_END 1608 1609 EXTERN_C_BEGIN 1610 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1611 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 1612 EXTERN_C_END 1613 1614 EXTERN_C_BEGIN 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1617 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1618 { 1619 Mat_SeqBAIJ *b; 1620 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1621 PetscTruth flg; 1622 1623 PetscFunctionBegin; 1624 1625 B->preallocated = PETSC_TRUE; 1626 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1627 if (nnz && newbs != bs) { 1628 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1629 } 1630 bs = newbs; 1631 1632 mbs = B->m/bs; 1633 nbs = B->n/bs; 1634 bs2 = bs*bs; 1635 1636 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1637 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1638 } 1639 1640 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1641 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1642 if (nnz) { 1643 for (i=0; i<mbs; i++) { 1644 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1645 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1646 } 1647 } 1648 1649 b = (Mat_SeqBAIJ*)B->data; 1650 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1651 B->ops->solve = MatSolve_SeqBAIJ_Update; 1652 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1653 if (!flg) { 1654 switch (bs) { 1655 case 1: 1656 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1657 B->ops->mult = MatMult_SeqBAIJ_1; 1658 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1659 break; 1660 case 2: 1661 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1662 B->ops->mult = MatMult_SeqBAIJ_2; 1663 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1664 break; 1665 case 3: 1666 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1667 B->ops->mult = MatMult_SeqBAIJ_3; 1668 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1669 break; 1670 case 4: 1671 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1672 B->ops->mult = MatMult_SeqBAIJ_4; 1673 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1674 break; 1675 case 5: 1676 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1677 B->ops->mult = MatMult_SeqBAIJ_5; 1678 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1679 break; 1680 case 6: 1681 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1682 B->ops->mult = MatMult_SeqBAIJ_6; 1683 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1684 break; 1685 case 7: 1686 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1687 B->ops->mult = MatMult_SeqBAIJ_7; 1688 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1689 break; 1690 default: 1691 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1692 B->ops->mult = MatMult_SeqBAIJ_N; 1693 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1694 break; 1695 } 1696 } 1697 b->bs = bs; 1698 b->mbs = mbs; 1699 b->nbs = nbs; 1700 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1701 if (!nnz) { 1702 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1703 else if (nz <= 0) nz = 1; 1704 for (i=0; i<mbs; i++) b->imax[i] = nz; 1705 nz = nz*mbs; 1706 } else { 1707 nz = 0; 1708 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1709 } 1710 1711 /* allocate the matrix space */ 1712 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1713 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1714 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1715 b->j = (int*)(b->a + nz*bs2); 1716 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1717 b->i = b->j + nz; 1718 b->singlemalloc = PETSC_TRUE; 1719 1720 b->i[0] = 0; 1721 for (i=1; i<mbs+1; i++) { 1722 b->i[i] = b->i[i-1] + b->imax[i-1]; 1723 } 1724 1725 /* b->ilen will count nonzeros in each block row so far. */ 1726 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1727 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1728 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1729 1730 b->bs = bs; 1731 b->bs2 = bs2; 1732 b->mbs = mbs; 1733 b->nz = 0; 1734 b->maxnz = nz*bs2; 1735 B->info.nz_unneeded = (PetscReal)b->maxnz; 1736 PetscFunctionReturn(0); 1737 } 1738 EXTERN_C_END 1739 1740 /*MC 1741 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 1742 block sparse compressed row format. 1743 1744 Options Database Keys: 1745 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 1746 1747 Level: beginner 1748 1749 .seealso: MatCreateSeqBAIJ 1750 M*/ 1751 1752 EXTERN_C_BEGIN 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatCreate_SeqBAIJ" 1755 int MatCreate_SeqBAIJ(Mat B) 1756 { 1757 int ierr,size; 1758 Mat_SeqBAIJ *b; 1759 1760 PetscFunctionBegin; 1761 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1762 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1763 1764 B->m = B->M = PetscMax(B->m,B->M); 1765 B->n = B->N = PetscMax(B->n,B->N); 1766 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1767 B->data = (void*)b; 1768 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1769 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1770 B->factor = 0; 1771 B->lupivotthreshold = 1.0; 1772 B->mapping = 0; 1773 b->row = 0; 1774 b->col = 0; 1775 b->icol = 0; 1776 b->reallocs = 0; 1777 b->saved_values = 0; 1778 #if defined(PETSC_USE_MAT_SINGLE) 1779 b->setvalueslen = 0; 1780 b->setvaluescopy = PETSC_NULL; 1781 #endif 1782 1783 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1784 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1785 1786 b->sorted = PETSC_FALSE; 1787 b->roworiented = PETSC_TRUE; 1788 b->nonew = 0; 1789 b->diag = 0; 1790 b->solve_work = 0; 1791 b->mult_work = 0; 1792 B->spptr = 0; 1793 B->info.nz_unneeded = (PetscReal)b->maxnz; 1794 b->keepzeroedrows = PETSC_FALSE; 1795 b->xtoy = 0; 1796 b->XtoY = 0; 1797 1798 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1799 "MatStoreValues_SeqBAIJ", 1800 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1801 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1802 "MatRetrieveValues_SeqBAIJ", 1803 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1804 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1805 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1806 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1807 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1808 "MatConvert_SeqBAIJ_SeqAIJ", 1809 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1810 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 1811 "MatConvert_SeqBAIJ_SeqSBAIJ", 1812 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 1813 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1814 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1815 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 EXTERN_C_END 1819 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1822 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1823 { 1824 Mat C; 1825 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1826 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1827 1828 PetscFunctionBegin; 1829 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1830 1831 *B = 0; 1832 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1833 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1834 c = (Mat_SeqBAIJ*)C->data; 1835 1836 C->M = A->M; 1837 C->N = A->N; 1838 c->bs = a->bs; 1839 c->bs2 = a->bs2; 1840 c->mbs = a->mbs; 1841 c->nbs = a->nbs; 1842 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1843 1844 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1845 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1846 for (i=0; i<mbs; i++) { 1847 c->imax[i] = a->imax[i]; 1848 c->ilen[i] = a->ilen[i]; 1849 } 1850 1851 /* allocate the matrix space */ 1852 c->singlemalloc = PETSC_TRUE; 1853 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1854 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1855 c->j = (int*)(c->a + nz*bs2); 1856 c->i = c->j + nz; 1857 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1858 if (mbs > 0) { 1859 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1860 if (cpvalues == MAT_COPY_VALUES) { 1861 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1862 } else { 1863 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1864 } 1865 } 1866 1867 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1868 c->sorted = a->sorted; 1869 c->roworiented = a->roworiented; 1870 c->nonew = a->nonew; 1871 1872 if (a->diag) { 1873 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1874 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1875 for (i=0; i<mbs; i++) { 1876 c->diag[i] = a->diag[i]; 1877 } 1878 } else c->diag = 0; 1879 c->nz = a->nz; 1880 c->maxnz = a->maxnz; 1881 c->solve_work = 0; 1882 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1883 c->mult_work = 0; 1884 C->preallocated = PETSC_TRUE; 1885 C->assembled = PETSC_TRUE; 1886 *B = C; 1887 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatLoad_SeqBAIJ" 1893 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1894 { 1895 Mat_SeqBAIJ *a; 1896 Mat B; 1897 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1898 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1899 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1900 int *masked,nmask,tmp,bs2,ishift; 1901 PetscScalar *aa; 1902 MPI_Comm comm = ((PetscObject)viewer)->comm; 1903 1904 PetscFunctionBegin; 1905 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1906 bs2 = bs*bs; 1907 1908 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1909 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1910 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1911 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1912 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1913 M = header[1]; N = header[2]; nz = header[3]; 1914 1915 if (header[3] < 0) { 1916 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1917 } 1918 1919 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1920 1921 /* 1922 This code adds extra rows to make sure the number of rows is 1923 divisible by the blocksize 1924 */ 1925 mbs = M/bs; 1926 extra_rows = bs - M + bs*(mbs); 1927 if (extra_rows == bs) extra_rows = 0; 1928 else mbs++; 1929 if (extra_rows) { 1930 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1931 } 1932 1933 /* read in row lengths */ 1934 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1935 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1936 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1937 1938 /* read in column indices */ 1939 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1940 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1941 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1942 1943 /* loop over row lengths determining block row lengths */ 1944 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1945 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1946 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1947 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1948 masked = mask + mbs; 1949 rowcount = 0; nzcount = 0; 1950 for (i=0; i<mbs; i++) { 1951 nmask = 0; 1952 for (j=0; j<bs; j++) { 1953 kmax = rowlengths[rowcount]; 1954 for (k=0; k<kmax; k++) { 1955 tmp = jj[nzcount++]/bs; 1956 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1957 } 1958 rowcount++; 1959 } 1960 browlengths[i] += nmask; 1961 /* zero out the mask elements we set */ 1962 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1963 } 1964 1965 /* create our matrix */ 1966 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 1967 ierr = MatSetType(B,type);CHKERRQ(ierr); 1968 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 1969 a = (Mat_SeqBAIJ*)B->data; 1970 1971 /* set matrix "i" values */ 1972 a->i[0] = 0; 1973 for (i=1; i<= mbs; i++) { 1974 a->i[i] = a->i[i-1] + browlengths[i-1]; 1975 a->ilen[i-1] = browlengths[i-1]; 1976 } 1977 a->nz = 0; 1978 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1979 1980 /* read in nonzero values */ 1981 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1982 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1983 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1984 1985 /* set "a" and "j" values into matrix */ 1986 nzcount = 0; jcount = 0; 1987 for (i=0; i<mbs; i++) { 1988 nzcountb = nzcount; 1989 nmask = 0; 1990 for (j=0; j<bs; j++) { 1991 kmax = rowlengths[i*bs+j]; 1992 for (k=0; k<kmax; k++) { 1993 tmp = jj[nzcount++]/bs; 1994 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1995 } 1996 } 1997 /* sort the masked values */ 1998 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1999 2000 /* set "j" values into matrix */ 2001 maskcount = 1; 2002 for (j=0; j<nmask; j++) { 2003 a->j[jcount++] = masked[j]; 2004 mask[masked[j]] = maskcount++; 2005 } 2006 /* set "a" values into matrix */ 2007 ishift = bs2*a->i[i]; 2008 for (j=0; j<bs; j++) { 2009 kmax = rowlengths[i*bs+j]; 2010 for (k=0; k<kmax; k++) { 2011 tmp = jj[nzcountb]/bs ; 2012 block = mask[tmp] - 1; 2013 point = jj[nzcountb] - bs*tmp; 2014 idx = ishift + bs2*block + j + bs*point; 2015 a->a[idx] = (MatScalar)aa[nzcountb++]; 2016 } 2017 } 2018 /* zero out the mask elements we set */ 2019 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2020 } 2021 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2022 2023 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2024 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2025 ierr = PetscFree(aa);CHKERRQ(ierr); 2026 ierr = PetscFree(jj);CHKERRQ(ierr); 2027 ierr = PetscFree(mask);CHKERRQ(ierr); 2028 2029 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2030 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2031 ierr = MatView_Private(B);CHKERRQ(ierr); 2032 2033 *A = B; 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatCreateSeqBAIJ" 2039 /*@C 2040 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2041 compressed row) format. For good matrix assembly performance the 2042 user should preallocate the matrix storage by setting the parameter nz 2043 (or the array nnz). By setting these parameters accurately, performance 2044 during matrix assembly can be increased by more than a factor of 50. 2045 2046 Collective on MPI_Comm 2047 2048 Input Parameters: 2049 + comm - MPI communicator, set to PETSC_COMM_SELF 2050 . bs - size of block 2051 . m - number of rows 2052 . n - number of columns 2053 . nz - number of nonzero blocks per block row (same for all rows) 2054 - nnz - array containing the number of nonzero blocks in the various block rows 2055 (possibly different for each block row) or PETSC_NULL 2056 2057 Output Parameter: 2058 . A - the matrix 2059 2060 Options Database Keys: 2061 . -mat_no_unroll - uses code that does not unroll the loops in the 2062 block calculations (much slower) 2063 . -mat_block_size - size of the blocks to use 2064 2065 Level: intermediate 2066 2067 Notes: 2068 A nonzero block is any block that as 1 or more nonzeros in it 2069 2070 The block AIJ format is fully compatible with standard Fortran 77 2071 storage. That is, the stored row and column indices can begin at 2072 either one (as in Fortran) or zero. See the users' manual for details. 2073 2074 Specify the preallocated storage with either nz or nnz (not both). 2075 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2076 allocation. For additional details, see the users manual chapter on 2077 matrices. 2078 2079 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2080 @*/ 2081 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2082 { 2083 int ierr; 2084 2085 PetscFunctionBegin; 2086 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2087 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2088 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2094 /*@C 2095 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2096 per row in the matrix. For good matrix assembly performance the 2097 user should preallocate the matrix storage by setting the parameter nz 2098 (or the array nnz). By setting these parameters accurately, performance 2099 during matrix assembly can be increased by more than a factor of 50. 2100 2101 Collective on MPI_Comm 2102 2103 Input Parameters: 2104 + A - the matrix 2105 . bs - size of block 2106 . nz - number of block nonzeros per block row (same for all rows) 2107 - nnz - array containing the number of block nonzeros in the various block rows 2108 (possibly different for each block row) or PETSC_NULL 2109 2110 Options Database Keys: 2111 . -mat_no_unroll - uses code that does not unroll the loops in the 2112 block calculations (much slower) 2113 . -mat_block_size - size of the blocks to use 2114 2115 Level: intermediate 2116 2117 Notes: 2118 The block AIJ format is fully compatible with standard Fortran 77 2119 storage. That is, the stored row and column indices can begin at 2120 either one (as in Fortran) or zero. See the users' manual for details. 2121 2122 Specify the preallocated storage with either nz or nnz (not both). 2123 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2124 allocation. For additional details, see the users manual chapter on 2125 matrices. 2126 2127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2128 @*/ 2129 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2130 { 2131 int ierr,(*f)(Mat,int,int,const int[]); 2132 2133 PetscFunctionBegin; 2134 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2135 if (f) { 2136 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2137 } 2138 PetscFunctionReturn(0); 2139 } 2140