1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 #if defined(PETSC_HAVE_DSCPACK) 163 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 164 #endif 165 166 #define CHUNKSIZE 10 167 168 /* 169 Checks for missing diagonals 170 */ 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 173 int MatMissingDiagonal_SeqBAIJ(Mat A) 174 { 175 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 176 int *diag,*jj = a->j,i,ierr; 177 178 PetscFunctionBegin; 179 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 180 diag = a->diag; 181 for (i=0; i<a->mbs; i++) { 182 if (jj[diag[i]] != i) { 183 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 184 } 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 191 int MatMarkDiagonal_SeqBAIJ(Mat A) 192 { 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 194 int i,j,*diag,m = a->mbs,ierr; 195 196 PetscFunctionBegin; 197 if (a->diag) PetscFunctionReturn(0); 198 199 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 200 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 201 for (i=0; i<m; i++) { 202 diag[i] = a->i[i+1]; 203 for (j=a->i[i]; j<a->i[i+1]; j++) { 204 if (a->j[j] == i) { 205 diag[i] = j; 206 break; 207 } 208 } 209 } 210 a->diag = diag; 211 PetscFunctionReturn(0); 212 } 213 214 215 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 219 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 220 { 221 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 222 int ierr,n = a->mbs,i; 223 224 PetscFunctionBegin; 225 *nn = n; 226 if (!ia) PetscFunctionReturn(0); 227 if (symmetric) { 228 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 229 } else if (oshift == 1) { 230 /* temporarily add 1 to i and j indices */ 231 int nz = a->i[n]; 232 for (i=0; i<nz; i++) a->j[i]++; 233 for (i=0; i<n+1; i++) a->i[i]++; 234 *ia = a->i; *ja = a->j; 235 } else { 236 *ia = a->i; *ja = a->j; 237 } 238 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 244 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 245 { 246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 247 int i,n = a->mbs,ierr; 248 249 PetscFunctionBegin; 250 if (!ia) PetscFunctionReturn(0); 251 if (symmetric) { 252 ierr = PetscFree(*ia);CHKERRQ(ierr); 253 ierr = PetscFree(*ja);CHKERRQ(ierr); 254 } else if (oshift == 1) { 255 int nz = a->i[n]-1; 256 for (i=0; i<nz; i++) a->j[i]--; 257 for (i=0; i<n+1; i++) a->i[i]--; 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 264 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 265 { 266 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 267 268 PetscFunctionBegin; 269 *bs = baij->bs; 270 PetscFunctionReturn(0); 271 } 272 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDestroy_SeqBAIJ" 276 int MatDestroy_SeqBAIJ(Mat A) 277 { 278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 279 int ierr; 280 281 PetscFunctionBegin; 282 #if defined(PETSC_USE_LOG) 283 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 284 #endif 285 ierr = PetscFree(a->a);CHKERRQ(ierr); 286 if (!a->singlemalloc) { 287 ierr = PetscFree(a->i);CHKERRQ(ierr); 288 ierr = PetscFree(a->j);CHKERRQ(ierr); 289 } 290 if (a->row) { 291 ierr = ISDestroy(a->row);CHKERRQ(ierr); 292 } 293 if (a->col) { 294 ierr = ISDestroy(a->col);CHKERRQ(ierr); 295 } 296 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 297 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 298 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 299 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 300 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 301 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 302 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 303 #if defined(PETSC_USE_MAT_SINGLE) 304 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 305 #endif 306 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 307 308 ierr = PetscFree(a);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatSetOption_SeqBAIJ" 314 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 315 { 316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 317 318 PetscFunctionBegin; 319 switch (op) { 320 case MAT_ROW_ORIENTED: 321 a->roworiented = PETSC_TRUE; 322 break; 323 case MAT_COLUMN_ORIENTED: 324 a->roworiented = PETSC_FALSE; 325 break; 326 case MAT_COLUMNS_SORTED: 327 a->sorted = PETSC_TRUE; 328 break; 329 case MAT_COLUMNS_UNSORTED: 330 a->sorted = PETSC_FALSE; 331 break; 332 case MAT_KEEP_ZEROED_ROWS: 333 a->keepzeroedrows = PETSC_TRUE; 334 break; 335 case MAT_NO_NEW_NONZERO_LOCATIONS: 336 a->nonew = 1; 337 break; 338 case MAT_NEW_NONZERO_LOCATION_ERR: 339 a->nonew = -1; 340 break; 341 case MAT_NEW_NONZERO_ALLOCATION_ERR: 342 a->nonew = -2; 343 break; 344 case MAT_YES_NEW_NONZERO_LOCATIONS: 345 a->nonew = 0; 346 break; 347 case MAT_ROWS_SORTED: 348 case MAT_ROWS_UNSORTED: 349 case MAT_YES_NEW_DIAGONALS: 350 case MAT_IGNORE_OFF_PROC_ENTRIES: 351 case MAT_USE_HASH_TABLE: 352 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 353 break; 354 case MAT_NO_NEW_DIAGONALS: 355 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 356 default: 357 SETERRQ(PETSC_ERR_SUP,"unknown option"); 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatGetRow_SeqBAIJ" 364 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 365 { 366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 367 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 368 MatScalar *aa,*aa_i; 369 PetscScalar *v_i; 370 371 PetscFunctionBegin; 372 bs = a->bs; 373 ai = a->i; 374 aj = a->j; 375 aa = a->a; 376 bs2 = a->bs2; 377 378 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 379 380 bn = row/bs; /* Block number */ 381 bp = row % bs; /* Block Position */ 382 M = ai[bn+1] - ai[bn]; 383 *nz = bs*M; 384 385 if (v) { 386 *v = 0; 387 if (*nz) { 388 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 389 for (i=0; i<M; i++) { /* for each block in the block row */ 390 v_i = *v + i*bs; 391 aa_i = aa + bs2*(ai[bn] + i); 392 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 393 } 394 } 395 } 396 397 if (idx) { 398 *idx = 0; 399 if (*nz) { 400 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 401 for (i=0; i<M; i++) { /* for each block in the block row */ 402 idx_i = *idx + i*bs; 403 itmp = bs*aj[ai[bn] + i]; 404 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 405 } 406 } 407 } 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 413 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 414 { 415 int ierr; 416 417 PetscFunctionBegin; 418 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 419 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatTranspose_SeqBAIJ" 425 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 426 { 427 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 428 Mat C; 429 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 430 int *rows,*cols,bs2=a->bs2; 431 PetscScalar *array; 432 433 PetscFunctionBegin; 434 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 435 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 436 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 437 438 #if defined(PETSC_USE_MAT_SINGLE) 439 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 440 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 441 #else 442 array = a->a; 443 #endif 444 445 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 446 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 447 ierr = PetscFree(col);CHKERRQ(ierr); 448 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 449 cols = rows + bs; 450 for (i=0; i<mbs; i++) { 451 cols[0] = i*bs; 452 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 453 len = ai[i+1] - ai[i]; 454 for (j=0; j<len; j++) { 455 rows[0] = (*aj++)*bs; 456 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 457 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 458 array += bs2; 459 } 460 } 461 ierr = PetscFree(rows);CHKERRQ(ierr); 462 #if defined(PETSC_USE_MAT_SINGLE) 463 ierr = PetscFree(array); 464 #endif 465 466 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 468 469 if (B) { 470 *B = C; 471 } else { 472 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 479 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 480 { 481 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 482 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 483 PetscScalar *aa; 484 FILE *file; 485 486 PetscFunctionBegin; 487 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 488 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 489 col_lens[0] = MAT_FILE_COOKIE; 490 491 col_lens[1] = A->m; 492 col_lens[2] = A->n; 493 col_lens[3] = a->nz*bs2; 494 495 /* store lengths of each row and write (including header) to file */ 496 count = 0; 497 for (i=0; i<a->mbs; i++) { 498 for (j=0; j<bs; j++) { 499 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 500 } 501 } 502 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 503 ierr = PetscFree(col_lens);CHKERRQ(ierr); 504 505 /* store column indices (zero start index) */ 506 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 507 count = 0; 508 for (i=0; i<a->mbs; i++) { 509 for (j=0; j<bs; j++) { 510 for (k=a->i[i]; k<a->i[i+1]; k++) { 511 for (l=0; l<bs; l++) { 512 jj[count++] = bs*a->j[k] + l; 513 } 514 } 515 } 516 } 517 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 518 ierr = PetscFree(jj);CHKERRQ(ierr); 519 520 /* store nonzero values */ 521 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 522 count = 0; 523 for (i=0; i<a->mbs; i++) { 524 for (j=0; j<bs; j++) { 525 for (k=a->i[i]; k<a->i[i+1]; k++) { 526 for (l=0; l<bs; l++) { 527 aa[count++] = a->a[bs2*k + l*bs + j]; 528 } 529 } 530 } 531 } 532 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 533 ierr = PetscFree(aa);CHKERRQ(ierr); 534 535 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 536 if (file) { 537 fprintf(file,"-matload_block_size %d\n",a->bs); 538 } 539 PetscFunctionReturn(0); 540 } 541 542 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 546 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 547 { 548 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 549 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 550 PetscViewerFormat format; 551 552 PetscFunctionBegin; 553 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 554 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 555 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 556 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 557 Mat aij; 558 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 559 ierr = MatView(aij,viewer);CHKERRQ(ierr); 560 ierr = MatDestroy(aij);CHKERRQ(ierr); 561 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 562 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 563 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 564 #endif 565 PetscFunctionReturn(0); 566 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 567 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 568 for (i=0; i<a->mbs; i++) { 569 for (j=0; j<bs; j++) { 570 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 571 for (k=a->i[i]; k<a->i[i+1]; k++) { 572 for (l=0; l<bs; l++) { 573 #if defined(PETSC_USE_COMPLEX) 574 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 575 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 576 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 577 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 578 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 579 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 580 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 581 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 582 } 583 #else 584 if (a->a[bs2*k + l*bs + j] != 0.0) { 585 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 586 } 587 #endif 588 } 589 } 590 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 591 } 592 } 593 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 594 } else { 595 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 596 for (i=0; i<a->mbs; i++) { 597 for (j=0; j<bs; j++) { 598 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 599 for (k=a->i[i]; k<a->i[i+1]; k++) { 600 for (l=0; l<bs; l++) { 601 #if defined(PETSC_USE_COMPLEX) 602 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 603 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 604 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 605 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 606 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 607 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 608 } else { 609 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 610 } 611 #else 612 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 613 #endif 614 } 615 } 616 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 617 } 618 } 619 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 620 } 621 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 627 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 628 { 629 Mat A = (Mat) Aa; 630 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 631 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 632 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 633 MatScalar *aa; 634 PetscViewer viewer; 635 636 PetscFunctionBegin; 637 638 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 639 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 640 641 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 642 643 /* loop over matrix elements drawing boxes */ 644 color = PETSC_DRAW_BLUE; 645 for (i=0,row=0; i<mbs; i++,row+=bs) { 646 for (j=a->i[i]; j<a->i[i+1]; j++) { 647 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 648 x_l = a->j[j]*bs; x_r = x_l + 1.0; 649 aa = a->a + j*bs2; 650 for (k=0; k<bs; k++) { 651 for (l=0; l<bs; l++) { 652 if (PetscRealPart(*aa++) >= 0.) continue; 653 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 654 } 655 } 656 } 657 } 658 color = PETSC_DRAW_CYAN; 659 for (i=0,row=0; i<mbs; i++,row+=bs) { 660 for (j=a->i[i]; j<a->i[i+1]; j++) { 661 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 662 x_l = a->j[j]*bs; x_r = x_l + 1.0; 663 aa = a->a + j*bs2; 664 for (k=0; k<bs; k++) { 665 for (l=0; l<bs; l++) { 666 if (PetscRealPart(*aa++) != 0.) continue; 667 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 668 } 669 } 670 } 671 } 672 673 color = PETSC_DRAW_RED; 674 for (i=0,row=0; i<mbs; i++,row+=bs) { 675 for (j=a->i[i]; j<a->i[i+1]; j++) { 676 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 677 x_l = a->j[j]*bs; x_r = x_l + 1.0; 678 aa = a->a + j*bs2; 679 for (k=0; k<bs; k++) { 680 for (l=0; l<bs; l++) { 681 if (PetscRealPart(*aa++) <= 0.) continue; 682 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 683 } 684 } 685 } 686 } 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 692 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 693 { 694 int ierr; 695 PetscReal xl,yl,xr,yr,w,h; 696 PetscDraw draw; 697 PetscTruth isnull; 698 699 PetscFunctionBegin; 700 701 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 702 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 703 704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 705 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 706 xr += w; yr += h; xl = -w; yl = -h; 707 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 708 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 709 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatView_SeqBAIJ" 715 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 716 { 717 int ierr; 718 PetscTruth issocket,isascii,isbinary,isdraw; 719 720 PetscFunctionBegin; 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 722 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 723 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 724 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 725 if (issocket) { 726 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 727 } else if (isascii){ 728 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 729 } else if (isbinary) { 730 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 731 } else if (isdraw) { 732 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 733 } else { 734 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 735 } 736 PetscFunctionReturn(0); 737 } 738 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatGetValues_SeqBAIJ" 742 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 743 { 744 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 745 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 746 int *ai = a->i,*ailen = a->ilen; 747 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 748 MatScalar *ap,*aa = a->a,zero = 0.0; 749 750 PetscFunctionBegin; 751 for (k=0; k<m; k++) { /* loop over rows */ 752 row = im[k]; brow = row/bs; 753 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 754 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 755 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 756 nrow = ailen[brow]; 757 for (l=0; l<n; l++) { /* loop over columns */ 758 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 759 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 760 col = in[l] ; 761 bcol = col/bs; 762 cidx = col%bs; 763 ridx = row%bs; 764 high = nrow; 765 low = 0; /* assume unsorted */ 766 while (high-low > 5) { 767 t = (low+high)/2; 768 if (rp[t] > bcol) high = t; 769 else low = t; 770 } 771 for (i=low; i<high; i++) { 772 if (rp[i] > bcol) break; 773 if (rp[i] == bcol) { 774 *v++ = ap[bs2*i+bs*cidx+ridx]; 775 goto finished; 776 } 777 } 778 *v++ = zero; 779 finished:; 780 } 781 } 782 PetscFunctionReturn(0); 783 } 784 785 #if defined(PETSC_USE_MAT_SINGLE) 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 788 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 789 { 790 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 791 int ierr,i,N = m*n*b->bs2; 792 MatScalar *vsingle; 793 794 PetscFunctionBegin; 795 if (N > b->setvalueslen) { 796 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 797 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 798 b->setvalueslen = N; 799 } 800 vsingle = b->setvaluescopy; 801 for (i=0; i<N; i++) { 802 vsingle[i] = v[i]; 803 } 804 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 #endif 808 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 812 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 813 { 814 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 815 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 816 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 817 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 818 PetscTruth roworiented=a->roworiented; 819 MatScalar *value = v,*ap,*aa = a->a,*bap; 820 821 PetscFunctionBegin; 822 if (roworiented) { 823 stepval = (n-1)*bs; 824 } else { 825 stepval = (m-1)*bs; 826 } 827 for (k=0; k<m; k++) { /* loop over added rows */ 828 row = im[k]; 829 if (row < 0) continue; 830 #if defined(PETSC_USE_BOPT_g) 831 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 832 #endif 833 rp = aj + ai[row]; 834 ap = aa + bs2*ai[row]; 835 rmax = imax[row]; 836 nrow = ailen[row]; 837 low = 0; 838 for (l=0; l<n; l++) { /* loop over added columns */ 839 if (in[l] < 0) continue; 840 #if defined(PETSC_USE_BOPT_g) 841 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 842 #endif 843 col = in[l]; 844 if (roworiented) { 845 value = v + k*(stepval+bs)*bs + l*bs; 846 } else { 847 value = v + l*(stepval+bs)*bs + k*bs; 848 } 849 if (!sorted) low = 0; high = nrow; 850 while (high-low > 7) { 851 t = (low+high)/2; 852 if (rp[t] > col) high = t; 853 else low = t; 854 } 855 for (i=low; i<high; i++) { 856 if (rp[i] > col) break; 857 if (rp[i] == col) { 858 bap = ap + bs2*i; 859 if (roworiented) { 860 if (is == ADD_VALUES) { 861 for (ii=0; ii<bs; ii++,value+=stepval) { 862 for (jj=ii; jj<bs2; jj+=bs) { 863 bap[jj] += *value++; 864 } 865 } 866 } else { 867 for (ii=0; ii<bs; ii++,value+=stepval) { 868 for (jj=ii; jj<bs2; jj+=bs) { 869 bap[jj] = *value++; 870 } 871 } 872 } 873 } else { 874 if (is == ADD_VALUES) { 875 for (ii=0; ii<bs; ii++,value+=stepval) { 876 for (jj=0; jj<bs; jj++) { 877 *bap++ += *value++; 878 } 879 } 880 } else { 881 for (ii=0; ii<bs; ii++,value+=stepval) { 882 for (jj=0; jj<bs; jj++) { 883 *bap++ = *value++; 884 } 885 } 886 } 887 } 888 goto noinsert2; 889 } 890 } 891 if (nonew == 1) goto noinsert2; 892 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 893 if (nrow >= rmax) { 894 /* there is no extra room in row, therefore enlarge */ 895 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 896 MatScalar *new_a; 897 898 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 899 900 /* malloc new storage space */ 901 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 902 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 903 new_j = (int*)(new_a + bs2*new_nz); 904 new_i = new_j + new_nz; 905 906 /* copy over old data into new slots */ 907 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 908 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 909 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 910 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 911 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 912 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 913 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 914 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 915 /* free up old matrix storage */ 916 ierr = PetscFree(a->a);CHKERRQ(ierr); 917 if (!a->singlemalloc) { 918 ierr = PetscFree(a->i);CHKERRQ(ierr); 919 ierr = PetscFree(a->j);CHKERRQ(ierr); 920 } 921 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 922 a->singlemalloc = PETSC_TRUE; 923 924 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 925 rmax = imax[row] = imax[row] + CHUNKSIZE; 926 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 927 a->maxnz += bs2*CHUNKSIZE; 928 a->reallocs++; 929 a->nz++; 930 } 931 N = nrow++ - 1; 932 /* shift up all the later entries in this row */ 933 for (ii=N; ii>=i; ii--) { 934 rp[ii+1] = rp[ii]; 935 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 936 } 937 if (N >= i) { 938 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 939 } 940 rp[i] = col; 941 bap = ap + bs2*i; 942 if (roworiented) { 943 for (ii=0; ii<bs; ii++,value+=stepval) { 944 for (jj=ii; jj<bs2; jj+=bs) { 945 bap[jj] = *value++; 946 } 947 } 948 } else { 949 for (ii=0; ii<bs; ii++,value+=stepval) { 950 for (jj=0; jj<bs; jj++) { 951 *bap++ = *value++; 952 } 953 } 954 } 955 noinsert2:; 956 low = i; 957 } 958 ailen[row] = nrow; 959 } 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 965 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 966 { 967 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 968 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 969 int m = A->m,*ip,N,*ailen = a->ilen; 970 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 971 MatScalar *aa = a->a,*ap; 972 #if defined(PETSC_HAVE_DSCPACK) 973 PetscTruth flag; 974 #endif 975 976 PetscFunctionBegin; 977 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 978 979 if (m) rmax = ailen[0]; 980 for (i=1; i<mbs; i++) { 981 /* move each row back by the amount of empty slots (fshift) before it*/ 982 fshift += imax[i-1] - ailen[i-1]; 983 rmax = PetscMax(rmax,ailen[i]); 984 if (fshift) { 985 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 986 N = ailen[i]; 987 for (j=0; j<N; j++) { 988 ip[j-fshift] = ip[j]; 989 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 990 } 991 } 992 ai[i] = ai[i-1] + ailen[i-1]; 993 } 994 if (mbs) { 995 fshift += imax[mbs-1] - ailen[mbs-1]; 996 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 997 } 998 /* reset ilen and imax for each row */ 999 for (i=0; i<mbs; i++) { 1000 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1001 } 1002 a->nz = ai[mbs]; 1003 1004 /* diagonals may have moved, so kill the diagonal pointers */ 1005 if (fshift && a->diag) { 1006 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1007 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1008 a->diag = 0; 1009 } 1010 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1011 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1012 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1013 a->reallocs = 0; 1014 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1015 1016 #if defined(PETSC_HAVE_DSCPACK) 1017 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1018 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1019 #endif 1020 1021 PetscFunctionReturn(0); 1022 } 1023 1024 1025 1026 /* 1027 This function returns an array of flags which indicate the locations of contiguous 1028 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1029 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1030 Assume: sizes should be long enough to hold all the values. 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1034 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1035 { 1036 int i,j,k,row; 1037 PetscTruth flg; 1038 1039 PetscFunctionBegin; 1040 for (i=0,j=0; i<n; j++) { 1041 row = idx[i]; 1042 if (row%bs!=0) { /* Not the begining of a block */ 1043 sizes[j] = 1; 1044 i++; 1045 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1046 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1047 i++; 1048 } else { /* Begining of the block, so check if the complete block exists */ 1049 flg = PETSC_TRUE; 1050 for (k=1; k<bs; k++) { 1051 if (row+k != idx[i+k]) { /* break in the block */ 1052 flg = PETSC_FALSE; 1053 break; 1054 } 1055 } 1056 if (flg == PETSC_TRUE) { /* No break in the bs */ 1057 sizes[j] = bs; 1058 i+= bs; 1059 } else { 1060 sizes[j] = 1; 1061 i++; 1062 } 1063 } 1064 } 1065 *bs_max = j; 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1071 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1072 { 1073 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1074 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1075 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1076 PetscScalar zero = 0.0; 1077 MatScalar *aa; 1078 1079 PetscFunctionBegin; 1080 /* Make a copy of the IS and sort it */ 1081 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1082 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1083 1084 /* allocate memory for rows,sizes */ 1085 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1086 sizes = rows + is_n; 1087 1088 /* copy IS values to rows, and sort them */ 1089 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1090 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1091 if (baij->keepzeroedrows) { 1092 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1093 bs_max = is_n; 1094 } else { 1095 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1096 } 1097 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1098 1099 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1100 row = rows[j]; 1101 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1102 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1103 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1104 if (sizes[i] == bs && !baij->keepzeroedrows) { 1105 if (diag) { 1106 if (baij->ilen[row/bs] > 0) { 1107 baij->ilen[row/bs] = 1; 1108 baij->j[baij->i[row/bs]] = row/bs; 1109 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1110 } 1111 /* Now insert all the diagonal values for this bs */ 1112 for (k=0; k<bs; k++) { 1113 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1114 } 1115 } else { /* (!diag) */ 1116 baij->ilen[row/bs] = 0; 1117 } /* end (!diag) */ 1118 } else { /* (sizes[i] != bs) */ 1119 #if defined (PETSC_USE_DEBUG) 1120 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1121 #endif 1122 for (k=0; k<count; k++) { 1123 aa[0] = zero; 1124 aa += bs; 1125 } 1126 if (diag) { 1127 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1128 } 1129 } 1130 } 1131 1132 ierr = PetscFree(rows);CHKERRQ(ierr); 1133 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1139 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1140 { 1141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1142 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1143 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1144 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1145 int ridx,cidx,bs2=a->bs2,ierr; 1146 PetscTruth roworiented=a->roworiented; 1147 MatScalar *ap,value,*aa=a->a,*bap; 1148 1149 PetscFunctionBegin; 1150 for (k=0; k<m; k++) { /* loop over added rows */ 1151 row = im[k]; brow = row/bs; 1152 if (row < 0) continue; 1153 #if defined(PETSC_USE_BOPT_g) 1154 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1155 #endif 1156 rp = aj + ai[brow]; 1157 ap = aa + bs2*ai[brow]; 1158 rmax = imax[brow]; 1159 nrow = ailen[brow]; 1160 low = 0; 1161 for (l=0; l<n; l++) { /* loop over added columns */ 1162 if (in[l] < 0) continue; 1163 #if defined(PETSC_USE_BOPT_g) 1164 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1165 #endif 1166 col = in[l]; bcol = col/bs; 1167 ridx = row % bs; cidx = col % bs; 1168 if (roworiented) { 1169 value = v[l + k*n]; 1170 } else { 1171 value = v[k + l*m]; 1172 } 1173 if (!sorted) low = 0; high = nrow; 1174 while (high-low > 7) { 1175 t = (low+high)/2; 1176 if (rp[t] > bcol) high = t; 1177 else low = t; 1178 } 1179 for (i=low; i<high; i++) { 1180 if (rp[i] > bcol) break; 1181 if (rp[i] == bcol) { 1182 bap = ap + bs2*i + bs*cidx + ridx; 1183 if (is == ADD_VALUES) *bap += value; 1184 else *bap = value; 1185 goto noinsert1; 1186 } 1187 } 1188 if (nonew == 1) goto noinsert1; 1189 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1190 if (nrow >= rmax) { 1191 /* there is no extra room in row, therefore enlarge */ 1192 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1193 MatScalar *new_a; 1194 1195 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1196 1197 /* Malloc new storage space */ 1198 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1199 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1200 new_j = (int*)(new_a + bs2*new_nz); 1201 new_i = new_j + new_nz; 1202 1203 /* copy over old data into new slots */ 1204 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1205 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1206 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1207 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1208 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1209 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1210 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1211 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1212 /* free up old matrix storage */ 1213 ierr = PetscFree(a->a);CHKERRQ(ierr); 1214 if (!a->singlemalloc) { 1215 ierr = PetscFree(a->i);CHKERRQ(ierr); 1216 ierr = PetscFree(a->j);CHKERRQ(ierr); 1217 } 1218 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1219 a->singlemalloc = PETSC_TRUE; 1220 1221 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1222 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1223 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1224 a->maxnz += bs2*CHUNKSIZE; 1225 a->reallocs++; 1226 a->nz++; 1227 } 1228 N = nrow++ - 1; 1229 /* shift up all the later entries in this row */ 1230 for (ii=N; ii>=i; ii--) { 1231 rp[ii+1] = rp[ii]; 1232 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1233 } 1234 if (N>=i) { 1235 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1236 } 1237 rp[i] = bcol; 1238 ap[bs2*i + bs*cidx + ridx] = value; 1239 noinsert1:; 1240 low = i; 1241 } 1242 ailen[brow] = nrow; 1243 } 1244 PetscFunctionReturn(0); 1245 } 1246 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1250 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1251 { 1252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1253 Mat outA; 1254 int ierr; 1255 PetscTruth row_identity,col_identity; 1256 1257 PetscFunctionBegin; 1258 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1259 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1260 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1261 if (!row_identity || !col_identity) { 1262 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1263 } 1264 1265 outA = inA; 1266 inA->factor = FACTOR_LU; 1267 1268 if (!a->diag) { 1269 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1270 } 1271 1272 a->row = row; 1273 a->col = col; 1274 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1275 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1276 1277 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1278 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1279 PetscLogObjectParent(inA,a->icol); 1280 1281 /* 1282 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1283 for ILU(0) factorization with natural ordering 1284 */ 1285 if (a->bs < 8) { 1286 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1287 } else { 1288 if (!a->solve_work) { 1289 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1290 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1291 } 1292 } 1293 1294 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1295 1296 PetscFunctionReturn(0); 1297 } 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1300 int MatPrintHelp_SeqBAIJ(Mat A) 1301 { 1302 static PetscTruth called = PETSC_FALSE; 1303 MPI_Comm comm = A->comm; 1304 int ierr; 1305 1306 PetscFunctionBegin; 1307 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1308 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1309 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 EXTERN_C_BEGIN 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1316 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1317 { 1318 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1319 int i,nz,nbs; 1320 1321 PetscFunctionBegin; 1322 nz = baij->maxnz/baij->bs2; 1323 nbs = baij->nbs; 1324 for (i=0; i<nz; i++) { 1325 baij->j[i] = indices[i]; 1326 } 1327 baij->nz = nz; 1328 for (i=0; i<nbs; i++) { 1329 baij->ilen[i] = baij->imax[i]; 1330 } 1331 1332 PetscFunctionReturn(0); 1333 } 1334 EXTERN_C_END 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1338 /*@ 1339 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1340 in the matrix. 1341 1342 Input Parameters: 1343 + mat - the SeqBAIJ matrix 1344 - indices - the column indices 1345 1346 Level: advanced 1347 1348 Notes: 1349 This can be called if you have precomputed the nonzero structure of the 1350 matrix and want to provide it to the matrix object to improve the performance 1351 of the MatSetValues() operation. 1352 1353 You MUST have set the correct numbers of nonzeros per row in the call to 1354 MatCreateSeqBAIJ(). 1355 1356 MUST be called before any calls to MatSetValues(); 1357 1358 @*/ 1359 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1360 { 1361 int ierr,(*f)(Mat,int *); 1362 1363 PetscFunctionBegin; 1364 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1365 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1366 if (f) { 1367 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1368 } else { 1369 SETERRQ(1,"Wrong type of matrix to set column indices"); 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1376 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1377 { 1378 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1379 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1380 PetscReal atmp; 1381 PetscScalar *x,zero = 0.0; 1382 MatScalar *aa; 1383 int ncols,brow,krow,kcol; 1384 1385 PetscFunctionBegin; 1386 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1387 bs = a->bs; 1388 aa = a->a; 1389 ai = a->i; 1390 aj = a->j; 1391 mbs = a->mbs; 1392 1393 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1394 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1395 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1396 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1397 for (i=0; i<mbs; i++) { 1398 ncols = ai[1] - ai[0]; ai++; 1399 brow = bs*i; 1400 for (j=0; j<ncols; j++){ 1401 /* bcol = bs*(*aj); */ 1402 for (kcol=0; kcol<bs; kcol++){ 1403 for (krow=0; krow<bs; krow++){ 1404 atmp = PetscAbsScalar(*aa); aa++; 1405 row = brow + krow; /* row index */ 1406 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1407 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1408 } 1409 } 1410 aj++; 1411 } 1412 } 1413 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1419 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1420 { 1421 int ierr; 1422 1423 PetscFunctionBegin; 1424 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1430 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscFunctionBegin; 1434 *array = a->a; 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1440 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1441 { 1442 PetscFunctionBegin; 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #include "petscblaslapack.h" 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1449 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1450 { 1451 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1452 int ierr,one=1,i,bs=y->bs,j,bs2; 1453 1454 PetscFunctionBegin; 1455 if (str == SAME_NONZERO_PATTERN) { 1456 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1457 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1458 if (y->xtoy && y->XtoY != X) { 1459 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1460 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1461 } 1462 if (!y->xtoy) { /* get xtoy */ 1463 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1464 y->XtoY = X; 1465 } 1466 bs2 = bs*bs; 1467 for (i=0; i<x->nz; i++) { 1468 j = 0; 1469 while (j < bs2){ 1470 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1471 j++; 1472 } 1473 } 1474 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1475 } else { 1476 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 /* -------------------------------------------------------------------*/ 1482 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1483 MatGetRow_SeqBAIJ, 1484 MatRestoreRow_SeqBAIJ, 1485 MatMult_SeqBAIJ_N, 1486 MatMultAdd_SeqBAIJ_N, 1487 MatMultTranspose_SeqBAIJ, 1488 MatMultTransposeAdd_SeqBAIJ, 1489 MatSolve_SeqBAIJ_N, 1490 0, 1491 0, 1492 0, 1493 MatLUFactor_SeqBAIJ, 1494 0, 1495 0, 1496 MatTranspose_SeqBAIJ, 1497 MatGetInfo_SeqBAIJ, 1498 MatEqual_SeqBAIJ, 1499 MatGetDiagonal_SeqBAIJ, 1500 MatDiagonalScale_SeqBAIJ, 1501 MatNorm_SeqBAIJ, 1502 0, 1503 MatAssemblyEnd_SeqBAIJ, 1504 0, 1505 MatSetOption_SeqBAIJ, 1506 MatZeroEntries_SeqBAIJ, 1507 MatZeroRows_SeqBAIJ, 1508 MatLUFactorSymbolic_SeqBAIJ, 1509 MatLUFactorNumeric_SeqBAIJ_N, 1510 0, 1511 0, 1512 MatSetUpPreallocation_SeqBAIJ, 1513 MatILUFactorSymbolic_SeqBAIJ, 1514 0, 1515 MatGetArray_SeqBAIJ, 1516 MatRestoreArray_SeqBAIJ, 1517 MatDuplicate_SeqBAIJ, 1518 0, 1519 0, 1520 MatILUFactor_SeqBAIJ, 1521 0, 1522 MatAXPY_SeqBAIJ, 1523 MatGetSubMatrices_SeqBAIJ, 1524 MatIncreaseOverlap_SeqBAIJ, 1525 MatGetValues_SeqBAIJ, 1526 0, 1527 MatPrintHelp_SeqBAIJ, 1528 MatScale_SeqBAIJ, 1529 0, 1530 0, 1531 0, 1532 MatGetBlockSize_SeqBAIJ, 1533 MatGetRowIJ_SeqBAIJ, 1534 MatRestoreRowIJ_SeqBAIJ, 1535 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 MatSetValuesBlocked_SeqBAIJ, 1542 MatGetSubMatrix_SeqBAIJ, 1543 MatDestroy_SeqBAIJ, 1544 MatView_SeqBAIJ, 1545 MatGetPetscMaps_Petsc, 1546 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 MatGetRowMax_SeqBAIJ, 1553 MatConvert_Basic}; 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1558 int MatStoreValues_SeqBAIJ(Mat mat) 1559 { 1560 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1561 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1562 int ierr; 1563 1564 PetscFunctionBegin; 1565 if (aij->nonew != 1) { 1566 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1567 } 1568 1569 /* allocate space for values if not already there */ 1570 if (!aij->saved_values) { 1571 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1572 } 1573 1574 /* copy values over */ 1575 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 EXTERN_C_END 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1583 int MatRetrieveValues_SeqBAIJ(Mat mat) 1584 { 1585 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1586 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1587 1588 PetscFunctionBegin; 1589 if (aij->nonew != 1) { 1590 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1591 } 1592 if (!aij->saved_values) { 1593 SETERRQ(1,"Must call MatStoreValues(A);first"); 1594 } 1595 1596 /* copy values over */ 1597 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 EXTERN_C_END 1601 1602 EXTERN_C_BEGIN 1603 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1604 EXTERN_C_END 1605 1606 EXTERN_C_BEGIN 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1609 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1610 { 1611 Mat_SeqBAIJ *b; 1612 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1613 PetscTruth flg; 1614 1615 PetscFunctionBegin; 1616 1617 B->preallocated = PETSC_TRUE; 1618 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1619 if (nnz && newbs != bs) { 1620 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1621 } 1622 bs = newbs; 1623 1624 mbs = B->m/bs; 1625 nbs = B->n/bs; 1626 bs2 = bs*bs; 1627 1628 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1629 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1630 } 1631 1632 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1633 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1634 if (nnz) { 1635 for (i=0; i<mbs; i++) { 1636 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1637 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1638 } 1639 } 1640 1641 b = (Mat_SeqBAIJ*)B->data; 1642 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1643 B->ops->solve = MatSolve_SeqBAIJ_Update; 1644 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1645 if (!flg) { 1646 switch (bs) { 1647 case 1: 1648 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1649 B->ops->mult = MatMult_SeqBAIJ_1; 1650 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1651 break; 1652 case 2: 1653 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1654 B->ops->mult = MatMult_SeqBAIJ_2; 1655 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1656 break; 1657 case 3: 1658 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1659 B->ops->mult = MatMult_SeqBAIJ_3; 1660 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1661 break; 1662 case 4: 1663 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1664 B->ops->mult = MatMult_SeqBAIJ_4; 1665 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1666 break; 1667 case 5: 1668 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1669 B->ops->mult = MatMult_SeqBAIJ_5; 1670 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1671 break; 1672 case 6: 1673 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1674 B->ops->mult = MatMult_SeqBAIJ_6; 1675 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1676 break; 1677 case 7: 1678 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1679 B->ops->mult = MatMult_SeqBAIJ_7; 1680 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1681 break; 1682 default: 1683 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1684 B->ops->mult = MatMult_SeqBAIJ_N; 1685 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1686 break; 1687 } 1688 } 1689 b->bs = bs; 1690 b->mbs = mbs; 1691 b->nbs = nbs; 1692 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1693 if (!nnz) { 1694 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1695 else if (nz <= 0) nz = 1; 1696 for (i=0; i<mbs; i++) b->imax[i] = nz; 1697 nz = nz*mbs; 1698 } else { 1699 nz = 0; 1700 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1701 } 1702 1703 /* allocate the matrix space */ 1704 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1705 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1706 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1707 b->j = (int*)(b->a + nz*bs2); 1708 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1709 b->i = b->j + nz; 1710 b->singlemalloc = PETSC_TRUE; 1711 1712 b->i[0] = 0; 1713 for (i=1; i<mbs+1; i++) { 1714 b->i[i] = b->i[i-1] + b->imax[i-1]; 1715 } 1716 1717 /* b->ilen will count nonzeros in each block row so far. */ 1718 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1719 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1720 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1721 1722 b->bs = bs; 1723 b->bs2 = bs2; 1724 b->mbs = mbs; 1725 b->nz = 0; 1726 b->maxnz = nz*bs2; 1727 B->info.nz_unneeded = (PetscReal)b->maxnz; 1728 PetscFunctionReturn(0); 1729 } 1730 EXTERN_C_END 1731 1732 EXTERN_C_BEGIN 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatCreate_SeqBAIJ" 1735 int MatCreate_SeqBAIJ(Mat B) 1736 { 1737 int ierr,size; 1738 Mat_SeqBAIJ *b; 1739 1740 PetscFunctionBegin; 1741 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1742 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1743 1744 B->m = B->M = PetscMax(B->m,B->M); 1745 B->n = B->N = PetscMax(B->n,B->N); 1746 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1747 B->data = (void*)b; 1748 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1749 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1750 B->factor = 0; 1751 B->lupivotthreshold = 1.0; 1752 B->mapping = 0; 1753 b->row = 0; 1754 b->col = 0; 1755 b->icol = 0; 1756 b->reallocs = 0; 1757 b->saved_values = 0; 1758 #if defined(PETSC_USE_MAT_SINGLE) 1759 b->setvalueslen = 0; 1760 b->setvaluescopy = PETSC_NULL; 1761 #endif 1762 1763 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1764 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1765 1766 b->sorted = PETSC_FALSE; 1767 b->roworiented = PETSC_TRUE; 1768 b->nonew = 0; 1769 b->diag = 0; 1770 b->solve_work = 0; 1771 b->mult_work = 0; 1772 B->spptr = 0; 1773 B->info.nz_unneeded = (PetscReal)b->maxnz; 1774 b->keepzeroedrows = PETSC_FALSE; 1775 b->xtoy = 0; 1776 b->XtoY = 0; 1777 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1779 "MatStoreValues_SeqBAIJ", 1780 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1782 "MatRetrieveValues_SeqBAIJ", 1783 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1784 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1785 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1786 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1787 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1788 "MatConvert_SeqBAIJ_SeqAIJ", 1789 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1790 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1791 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1792 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 } 1795 EXTERN_C_END 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1799 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1800 { 1801 Mat C; 1802 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1803 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1804 1805 PetscFunctionBegin; 1806 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1807 1808 *B = 0; 1809 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1810 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1811 c = (Mat_SeqBAIJ*)C->data; 1812 1813 c->bs = a->bs; 1814 c->bs2 = a->bs2; 1815 c->mbs = a->mbs; 1816 c->nbs = a->nbs; 1817 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1818 1819 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1820 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1821 for (i=0; i<mbs; i++) { 1822 c->imax[i] = a->imax[i]; 1823 c->ilen[i] = a->ilen[i]; 1824 } 1825 1826 /* allocate the matrix space */ 1827 c->singlemalloc = PETSC_TRUE; 1828 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1829 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1830 c->j = (int*)(c->a + nz*bs2); 1831 c->i = c->j + nz; 1832 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1833 if (mbs > 0) { 1834 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1835 if (cpvalues == MAT_COPY_VALUES) { 1836 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1837 } else { 1838 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1839 } 1840 } 1841 1842 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1843 c->sorted = a->sorted; 1844 c->roworiented = a->roworiented; 1845 c->nonew = a->nonew; 1846 1847 if (a->diag) { 1848 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1849 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1850 for (i=0; i<mbs; i++) { 1851 c->diag[i] = a->diag[i]; 1852 } 1853 } else c->diag = 0; 1854 c->nz = a->nz; 1855 c->maxnz = a->maxnz; 1856 c->solve_work = 0; 1857 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1858 c->mult_work = 0; 1859 C->preallocated = PETSC_TRUE; 1860 C->assembled = PETSC_TRUE; 1861 *B = C; 1862 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 EXTERN_C_BEGIN 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatLoad_SeqBAIJ" 1869 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1870 { 1871 Mat_SeqBAIJ *a; 1872 Mat B; 1873 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1874 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1875 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1876 int *masked,nmask,tmp,bs2,ishift; 1877 PetscScalar *aa; 1878 MPI_Comm comm = ((PetscObject)viewer)->comm; 1879 #if defined(PETSC_HAVE_DSCPACK) 1880 PetscTruth flag; 1881 #endif 1882 1883 PetscFunctionBegin; 1884 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1885 bs2 = bs*bs; 1886 1887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1888 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1889 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1890 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1891 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1892 M = header[1]; N = header[2]; nz = header[3]; 1893 1894 if (header[3] < 0) { 1895 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1896 } 1897 1898 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1899 1900 /* 1901 This code adds extra rows to make sure the number of rows is 1902 divisible by the blocksize 1903 */ 1904 mbs = M/bs; 1905 extra_rows = bs - M + bs*(mbs); 1906 if (extra_rows == bs) extra_rows = 0; 1907 else mbs++; 1908 if (extra_rows) { 1909 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1910 } 1911 1912 /* read in row lengths */ 1913 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1914 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1915 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1916 1917 /* read in column indices */ 1918 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1919 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1920 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1921 1922 /* loop over row lengths determining block row lengths */ 1923 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1924 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1925 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1926 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1927 masked = mask + mbs; 1928 rowcount = 0; nzcount = 0; 1929 for (i=0; i<mbs; i++) { 1930 nmask = 0; 1931 for (j=0; j<bs; j++) { 1932 kmax = rowlengths[rowcount]; 1933 for (k=0; k<kmax; k++) { 1934 tmp = jj[nzcount++]/bs; 1935 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1936 } 1937 rowcount++; 1938 } 1939 browlengths[i] += nmask; 1940 /* zero out the mask elements we set */ 1941 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1942 } 1943 1944 /* create our matrix */ 1945 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1946 B = *A; 1947 a = (Mat_SeqBAIJ*)B->data; 1948 1949 /* set matrix "i" values */ 1950 a->i[0] = 0; 1951 for (i=1; i<= mbs; i++) { 1952 a->i[i] = a->i[i-1] + browlengths[i-1]; 1953 a->ilen[i-1] = browlengths[i-1]; 1954 } 1955 a->nz = 0; 1956 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1957 1958 /* read in nonzero values */ 1959 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1960 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1961 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1962 1963 /* set "a" and "j" values into matrix */ 1964 nzcount = 0; jcount = 0; 1965 for (i=0; i<mbs; i++) { 1966 nzcountb = nzcount; 1967 nmask = 0; 1968 for (j=0; j<bs; j++) { 1969 kmax = rowlengths[i*bs+j]; 1970 for (k=0; k<kmax; k++) { 1971 tmp = jj[nzcount++]/bs; 1972 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1973 } 1974 } 1975 /* sort the masked values */ 1976 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1977 1978 /* set "j" values into matrix */ 1979 maskcount = 1; 1980 for (j=0; j<nmask; j++) { 1981 a->j[jcount++] = masked[j]; 1982 mask[masked[j]] = maskcount++; 1983 } 1984 /* set "a" values into matrix */ 1985 ishift = bs2*a->i[i]; 1986 for (j=0; j<bs; j++) { 1987 kmax = rowlengths[i*bs+j]; 1988 for (k=0; k<kmax; k++) { 1989 tmp = jj[nzcountb]/bs ; 1990 block = mask[tmp] - 1; 1991 point = jj[nzcountb] - bs*tmp; 1992 idx = ishift + bs2*block + j + bs*point; 1993 a->a[idx] = (MatScalar)aa[nzcountb++]; 1994 } 1995 } 1996 /* zero out the mask elements we set */ 1997 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1998 } 1999 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2000 2001 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2002 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2003 ierr = PetscFree(aa);CHKERRQ(ierr); 2004 ierr = PetscFree(jj);CHKERRQ(ierr); 2005 ierr = PetscFree(mask);CHKERRQ(ierr); 2006 2007 B->assembled = PETSC_TRUE; 2008 2009 #if defined(PETSC_HAVE_DSCPACK) 2010 ierr = PetscOptionsHasName(B->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 2011 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(B);CHKERRQ(ierr); } 2012 #endif 2013 2014 ierr = MatView_Private(B);CHKERRQ(ierr); 2015 PetscFunctionReturn(0); 2016 } 2017 EXTERN_C_END 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatCreateSeqBAIJ" 2021 /*@C 2022 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2023 compressed row) format. For good matrix assembly performance the 2024 user should preallocate the matrix storage by setting the parameter nz 2025 (or the array nnz). By setting these parameters accurately, performance 2026 during matrix assembly can be increased by more than a factor of 50. 2027 2028 Collective on MPI_Comm 2029 2030 Input Parameters: 2031 + comm - MPI communicator, set to PETSC_COMM_SELF 2032 . bs - size of block 2033 . m - number of rows 2034 . n - number of columns 2035 . nz - number of nonzero blocks per block row (same for all rows) 2036 - nnz - array containing the number of nonzero blocks in the various block rows 2037 (possibly different for each block row) or PETSC_NULL 2038 2039 Output Parameter: 2040 . A - the matrix 2041 2042 Options Database Keys: 2043 . -mat_no_unroll - uses code that does not unroll the loops in the 2044 block calculations (much slower) 2045 . -mat_block_size - size of the blocks to use 2046 2047 Level: intermediate 2048 2049 Notes: 2050 A nonzero block is any block that as 1 or more nonzeros in it 2051 2052 The block AIJ format is fully compatible with standard Fortran 77 2053 storage. That is, the stored row and column indices can begin at 2054 either one (as in Fortran) or zero. See the users' manual for details. 2055 2056 Specify the preallocated storage with either nz or nnz (not both). 2057 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2058 allocation. For additional details, see the users manual chapter on 2059 matrices. 2060 2061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2062 @*/ 2063 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 2064 { 2065 int ierr; 2066 2067 PetscFunctionBegin; 2068 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2069 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2070 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2076 /*@C 2077 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2078 per row in the matrix. For good matrix assembly performance the 2079 user should preallocate the matrix storage by setting the parameter nz 2080 (or the array nnz). By setting these parameters accurately, performance 2081 during matrix assembly can be increased by more than a factor of 50. 2082 2083 Collective on MPI_Comm 2084 2085 Input Parameters: 2086 + A - the matrix 2087 . bs - size of block 2088 . nz - number of block nonzeros per block row (same for all rows) 2089 - nnz - array containing the number of block nonzeros in the various block rows 2090 (possibly different for each block row) or PETSC_NULL 2091 2092 Options Database Keys: 2093 . -mat_no_unroll - uses code that does not unroll the loops in the 2094 block calculations (much slower) 2095 . -mat_block_size - size of the blocks to use 2096 2097 Level: intermediate 2098 2099 Notes: 2100 The block AIJ format is fully compatible with standard Fortran 77 2101 storage. That is, the stored row and column indices can begin at 2102 either one (as in Fortran) or zero. See the users' manual for details. 2103 2104 Specify the preallocated storage with either nz or nnz (not both). 2105 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2106 allocation. For additional details, see the users manual chapter on 2107 matrices. 2108 2109 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2110 @*/ 2111 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 2112 { 2113 int ierr,(*f)(Mat,int,int,int*); 2114 2115 PetscFunctionBegin; 2116 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2117 if (f) { 2118 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122