1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baij.c,v 1.162 1999/02/15 18:32:41 balay Exp balay $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "sys.h" 10 #include "src/mat/impls/baij/seq/baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 14 #define CHUNKSIZE 10 15 16 /* 17 Checks for missing diagonals 18 */ 19 #undef __FUNC__ 20 #define __FUNC__ "MatMissingDiag_SeqBAIJ" 21 int MatMissingDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int *diag = a->diag, *jj = a->j,i; 25 26 PetscFunctionBegin; 27 for ( i=0; i<a->mbs; i++ ) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 37 int MatMarkDiag_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 40 int i,j, *diag, m = a->mbs; 41 42 PetscFunctionBegin; 43 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 44 PLogObjectMemory(A,(m+1)*sizeof(int)); 45 for ( i=0; i<m; i++ ) { 46 diag[i] = a->i[i+1]; 47 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 48 if (a->j[j] == i) { 49 diag[i] = j; 50 break; 51 } 52 } 53 } 54 a->diag = diag; 55 PetscFunctionReturn(0); 56 } 57 58 59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 60 61 #undef __FUNC__ 62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 64 PetscTruth *done) 65 { 66 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 67 int ierr, n = a->mbs,i; 68 69 PetscFunctionBegin; 70 *nn = n; 71 if (!ia) PetscFunctionReturn(0); 72 if (symmetric) { 73 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 74 } else if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 int nz = a->i[n] + 1; 77 for ( i=0; i<nz; i++ ) a->j[i]++; 78 for ( i=0; i<n+1; i++ ) a->i[i]++; 79 *ia = a->i; *ja = a->j; 80 } else { 81 *ia = a->i; *ja = a->j; 82 } 83 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNC__ 88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 90 PetscTruth *done) 91 { 92 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 93 int i,n = a->mbs; 94 95 PetscFunctionBegin; 96 if (!ia) PetscFunctionReturn(0); 97 if (symmetric) { 98 PetscFree(*ia); 99 PetscFree(*ja); 100 } else if (oshift == 1) { 101 int nz = a->i[n]; 102 for ( i=0; i<nz; i++ ) a->j[i]--; 103 for ( i=0; i<n+1; i++ ) a->i[i]--; 104 } 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNC__ 109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 111 { 112 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 113 114 PetscFunctionBegin; 115 *bs = baij->bs; 116 PetscFunctionReturn(0); 117 } 118 119 120 #undef __FUNC__ 121 #define __FUNC__ "MatDestroy_SeqBAIJ" 122 int MatDestroy_SeqBAIJ(Mat A) 123 { 124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 125 int ierr; 126 127 if (--A->refct > 0) PetscFunctionReturn(0); 128 129 if (A->mapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 131 } 132 if (A->bmapping) { 133 ierr = ISLocalToGlobalMappingDestroy(A->bmapping); CHKERRQ(ierr); 134 } 135 if (A->rmap) { 136 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 137 } 138 if (A->cmap) { 139 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 140 } 141 #if defined(USE_PETSC_LOG) 142 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 143 #endif 144 PetscFree(a->a); 145 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 146 if (a->diag) PetscFree(a->diag); 147 if (a->ilen) PetscFree(a->ilen); 148 if (a->imax) PetscFree(a->imax); 149 if (a->solve_work) PetscFree(a->solve_work); 150 if (a->mult_work) PetscFree(a->mult_work); 151 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 152 PetscFree(a); 153 PLogObjectDestroy(A); 154 PetscHeaderDestroy(A); 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNC__ 159 #define __FUNC__ "MatSetOption_SeqBAIJ" 160 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 161 { 162 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 163 164 PetscFunctionBegin; 165 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 166 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 167 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 168 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 169 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 170 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 171 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 172 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 173 else if (op == MAT_ROWS_SORTED || 174 op == MAT_ROWS_UNSORTED || 175 op == MAT_SYMMETRIC || 176 op == MAT_STRUCTURALLY_SYMMETRIC || 177 op == MAT_YES_NEW_DIAGONALS || 178 op == MAT_IGNORE_OFF_PROC_ENTRIES || 179 op == MAT_USE_HASH_TABLE) { 180 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 181 } else if (op == MAT_NO_NEW_DIAGONALS) { 182 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 183 } else { 184 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 185 } 186 PetscFunctionReturn(0); 187 } 188 189 190 #undef __FUNC__ 191 #define __FUNC__ "MatGetSize_SeqBAIJ" 192 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 193 { 194 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 195 196 PetscFunctionBegin; 197 if (m) *m = a->m; 198 if (n) *n = a->n; 199 PetscFunctionReturn(0); 200 } 201 202 #undef __FUNC__ 203 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 204 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 205 { 206 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 207 208 PetscFunctionBegin; 209 *m = 0; *n = a->m; 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNC__ 214 #define __FUNC__ "MatGetRow_SeqBAIJ" 215 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 218 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 219 MatScalar *aa,*aa_i; 220 Scalar *v_i; 221 222 PetscFunctionBegin; 223 bs = a->bs; 224 ai = a->i; 225 aj = a->j; 226 aa = a->a; 227 bs2 = a->bs2; 228 229 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 230 231 bn = row/bs; /* Block number */ 232 bp = row % bs; /* Block Position */ 233 M = ai[bn+1] - ai[bn]; 234 *nz = bs*M; 235 236 if (v) { 237 *v = 0; 238 if (*nz) { 239 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 240 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 241 v_i = *v + i*bs; 242 aa_i = aa + bs2*(ai[bn] + i); 243 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 244 } 245 } 246 } 247 248 if (idx) { 249 *idx = 0; 250 if (*nz) { 251 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 252 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 253 idx_i = *idx + i*bs; 254 itmp = bs*aj[ai[bn] + i]; 255 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 264 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 265 { 266 PetscFunctionBegin; 267 if (idx) {if (*idx) PetscFree(*idx);} 268 if (v) {if (*v) PetscFree(*v);} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNC__ 273 #define __FUNC__ "MatTranspose_SeqBAIJ" 274 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 275 { 276 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 277 Mat C; 278 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 279 int *rows,*cols,bs2=a->bs2; 280 MatScalar *array = a->a; 281 282 PetscFunctionBegin; 283 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 284 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 285 PetscMemzero(col,(1+nbs)*sizeof(int)); 286 287 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 288 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 289 PetscFree(col); 290 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 291 cols = rows + bs; 292 for ( i=0; i<mbs; i++ ) { 293 cols[0] = i*bs; 294 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 295 len = ai[i+1] - ai[i]; 296 for ( j=0; j<len; j++ ) { 297 rows[0] = (*aj++)*bs; 298 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 299 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 300 array += bs2; 301 } 302 } 303 PetscFree(rows); 304 305 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 306 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 307 308 if (B != PETSC_NULL) { 309 *B = C; 310 } else { 311 PetscOps *Abops; 312 MatOps Aops; 313 314 /* This isn't really an in-place transpose */ 315 PetscFree(a->a); 316 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 317 if (a->diag) PetscFree(a->diag); 318 if (a->ilen) PetscFree(a->ilen); 319 if (a->imax) PetscFree(a->imax); 320 if (a->solve_work) PetscFree(a->solve_work); 321 PetscFree(a); 322 323 324 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 325 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 326 327 /* 328 This is horrible, horrible code. We need to keep the 329 A pointers for the bops and ops but copy everything 330 else from C. 331 */ 332 Abops = A->bops; 333 Aops = A->ops; 334 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 335 A->bops = Abops; 336 A->ops = Aops; 337 A->qlist = 0; 338 339 PetscHeaderDestroy(C); 340 } 341 PetscFunctionReturn(0); 342 } 343 344 345 346 347 #undef __FUNC__ 348 #define __FUNC__ "MatView_SeqBAIJ_Binary" 349 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 350 { 351 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 352 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 353 Scalar *aa; 354 355 PetscFunctionBegin; 356 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 357 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 358 col_lens[0] = MAT_COOKIE; 359 360 col_lens[1] = a->m; 361 col_lens[2] = a->n; 362 col_lens[3] = a->nz*bs2; 363 364 /* store lengths of each row and write (including header) to file */ 365 count = 0; 366 for ( i=0; i<a->mbs; i++ ) { 367 for ( j=0; j<bs; j++ ) { 368 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 369 } 370 } 371 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr); 372 PetscFree(col_lens); 373 374 /* store column indices (zero start index) */ 375 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 376 count = 0; 377 for ( i=0; i<a->mbs; i++ ) { 378 for ( j=0; j<bs; j++ ) { 379 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 380 for ( l=0; l<bs; l++ ) { 381 jj[count++] = bs*a->j[k] + l; 382 } 383 } 384 } 385 } 386 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr); 387 PetscFree(jj); 388 389 /* store nonzero values */ 390 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 391 count = 0; 392 for ( i=0; i<a->mbs; i++ ) { 393 for ( j=0; j<bs; j++ ) { 394 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 395 for ( l=0; l<bs; l++ ) { 396 aa[count++] = a->a[bs2*k + l*bs + j]; 397 } 398 } 399 } 400 } 401 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr); 402 PetscFree(aa); 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNC__ 407 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 408 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 409 { 410 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 411 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 412 FILE *fd; 413 char *outputname; 414 415 PetscFunctionBegin; 416 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 417 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 418 ierr = ViewerGetFormat(viewer,&format); 419 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 420 ViewerASCIIPrintf(viewer," block size is %d\n",bs); 421 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 422 SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported"); 423 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 424 for ( i=0; i<a->mbs; i++ ) { 425 for ( j=0; j<bs; j++ ) { 426 fprintf(fd,"row %d:",i*bs+j); 427 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 428 for ( l=0; l<bs; l++ ) { 429 #if defined(USE_PETSC_COMPLEX) 430 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 431 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 432 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j])); 433 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 434 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 435 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j])); 436 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 437 fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j])); 438 } 439 #else 440 if (a->a[bs2*k + l*bs + j] != 0.0) { 441 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 442 } 443 #endif 444 } 445 } 446 fprintf(fd,"\n"); 447 } 448 } 449 } else { 450 for ( i=0; i<a->mbs; i++ ) { 451 for ( j=0; j<bs; j++ ) { 452 fprintf(fd,"row %d:",i*bs+j); 453 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 454 for ( l=0; l<bs; l++ ) { 455 #if defined(USE_PETSC_COMPLEX) 456 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 457 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 458 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j])); 459 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 460 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 461 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j])); 462 } else { 463 fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j])); 464 } 465 #else 466 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 467 #endif 468 } 469 } 470 fprintf(fd,"\n"); 471 } 472 } 473 } 474 fflush(fd); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNC__ 479 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 480 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 481 { 482 Mat A = (Mat) Aa; 483 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 484 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 485 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 486 MatScalar *aa; 487 MPI_Comm comm; 488 Viewer viewer; 489 490 PetscFunctionBegin; 491 /* 492 This is nasty. If this is called from an originally parallel matrix 493 then all processes call this, but only the first has the matrix so the 494 rest should return immediately. 495 */ 496 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 497 MPI_Comm_rank(comm,&rank); 498 if (rank) PetscFunctionReturn(0); 499 500 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 501 502 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr); CHKERRQ(ierr); 503 504 /* loop over matrix elements drawing boxes */ 505 color = DRAW_BLUE; 506 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 507 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 508 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 509 x_l = a->j[j]*bs; x_r = x_l + 1.0; 510 aa = a->a + j*bs2; 511 for ( k=0; k<bs; k++ ) { 512 for ( l=0; l<bs; l++ ) { 513 if (PetscReal(*aa++) >= 0.) continue; 514 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 515 } 516 } 517 } 518 } 519 color = DRAW_CYAN; 520 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 521 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 522 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 523 x_l = a->j[j]*bs; x_r = x_l + 1.0; 524 aa = a->a + j*bs2; 525 for ( k=0; k<bs; k++ ) { 526 for ( l=0; l<bs; l++ ) { 527 if (PetscReal(*aa++) != 0.) continue; 528 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 529 } 530 } 531 } 532 } 533 534 color = DRAW_RED; 535 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 536 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 537 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 538 x_l = a->j[j]*bs; x_r = x_l + 1.0; 539 aa = a->a + j*bs2; 540 for ( k=0; k<bs; k++ ) { 541 for ( l=0; l<bs; l++ ) { 542 if (PetscReal(*aa++) <= 0.) continue; 543 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 544 } 545 } 546 } 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNC__ 552 #define __FUNC__ "MatView_SeqBAIJ_Draw" 553 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 554 { 555 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 556 int ierr; 557 double xl,yl,xr,yr,w,h; 558 Draw draw; 559 PetscTruth isnull; 560 561 PetscFunctionBegin; 562 563 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 564 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 565 566 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 567 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 568 xr += w; yr += h; xl = -w; yl = -h; 569 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 570 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A); CHKERRQ(ierr); 571 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNC__ 576 #define __FUNC__ "MatView_SeqBAIJ" 577 int MatView_SeqBAIJ(Mat A,Viewer viewer) 578 { 579 ViewerType vtype; 580 int ierr; 581 582 PetscFunctionBegin; 583 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 584 if (PetscTypeCompare(vtype,SOCKET_VIEWER)) { 585 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 586 } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){ 587 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 588 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 589 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 590 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 591 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 592 } else { 593 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 594 } 595 PetscFunctionReturn(0); 596 } 597 598 599 #undef __FUNC__ 600 #define __FUNC__ "MatGetValues_SeqBAIJ" 601 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 602 { 603 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 604 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 605 int *ai = a->i, *ailen = a->ilen; 606 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 607 MatScalar *ap, *aa = a->a, zero = 0.0; 608 609 PetscFunctionBegin; 610 for ( k=0; k<m; k++ ) { /* loop over rows */ 611 row = im[k]; brow = row/bs; 612 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 613 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 614 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 615 nrow = ailen[brow]; 616 for ( l=0; l<n; l++ ) { /* loop over columns */ 617 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 618 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 619 col = in[l] ; 620 bcol = col/bs; 621 cidx = col%bs; 622 ridx = row%bs; 623 high = nrow; 624 low = 0; /* assume unsorted */ 625 while (high-low > 5) { 626 t = (low+high)/2; 627 if (rp[t] > bcol) high = t; 628 else low = t; 629 } 630 for ( i=low; i<high; i++ ) { 631 if (rp[i] > bcol) break; 632 if (rp[i] == bcol) { 633 *v++ = ap[bs2*i+bs*cidx+ridx]; 634 goto finished; 635 } 636 } 637 *v++ = zero; 638 finished:; 639 } 640 } 641 PetscFunctionReturn(0); 642 } 643 644 645 #undef __FUNC__ 646 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 647 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 648 { 649 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 650 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 651 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 652 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 653 Scalar *value = v; 654 MatScalar *ap,*aa=a->a,*bap; 655 656 PetscFunctionBegin; 657 if (roworiented) { 658 stepval = (n-1)*bs; 659 } else { 660 stepval = (m-1)*bs; 661 } 662 for ( k=0; k<m; k++ ) { /* loop over added rows */ 663 row = im[k]; 664 if (row < 0) continue; 665 #if defined(USE_PETSC_BOPT_g) 666 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 667 #endif 668 rp = aj + ai[row]; 669 ap = aa + bs2*ai[row]; 670 rmax = imax[row]; 671 nrow = ailen[row]; 672 low = 0; 673 for ( l=0; l<n; l++ ) { /* loop over added columns */ 674 if (in[l] < 0) continue; 675 #if defined(USE_PETSC_BOPT_g) 676 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 677 #endif 678 col = in[l]; 679 if (roworiented) { 680 value = v + k*(stepval+bs)*bs + l*bs; 681 } else { 682 value = v + l*(stepval+bs)*bs + k*bs; 683 } 684 if (!sorted) low = 0; high = nrow; 685 while (high-low > 7) { 686 t = (low+high)/2; 687 if (rp[t] > col) high = t; 688 else low = t; 689 } 690 for ( i=low; i<high; i++ ) { 691 if (rp[i] > col) break; 692 if (rp[i] == col) { 693 bap = ap + bs2*i; 694 if (roworiented) { 695 if (is == ADD_VALUES) { 696 for ( ii=0; ii<bs; ii++,value+=stepval ) { 697 for (jj=ii; jj<bs2; jj+=bs ) { 698 bap[jj] += *value++; 699 } 700 } 701 } else { 702 for ( ii=0; ii<bs; ii++,value+=stepval ) { 703 for (jj=ii; jj<bs2; jj+=bs ) { 704 bap[jj] = *value++; 705 } 706 } 707 } 708 } else { 709 if (is == ADD_VALUES) { 710 for ( ii=0; ii<bs; ii++,value+=stepval ) { 711 for (jj=0; jj<bs; jj++ ) { 712 *bap++ += *value++; 713 } 714 } 715 } else { 716 for ( ii=0; ii<bs; ii++,value+=stepval ) { 717 for (jj=0; jj<bs; jj++ ) { 718 *bap++ = *value++; 719 } 720 } 721 } 722 } 723 goto noinsert2; 724 } 725 } 726 if (nonew == 1) goto noinsert2; 727 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 728 if (nrow >= rmax) { 729 /* there is no extra room in row, therefore enlarge */ 730 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 731 MatScalar *new_a; 732 733 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 734 735 /* malloc new storage space */ 736 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 737 new_a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a); 738 new_j = (int *) (new_a + bs2*new_nz); 739 new_i = new_j + new_nz; 740 741 /* copy over old data into new slots */ 742 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 743 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 744 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 745 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 746 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 747 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar)); 748 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar)); 749 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar)); 750 /* free up old matrix storage */ 751 PetscFree(a->a); 752 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 753 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 754 a->singlemalloc = 1; 755 756 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 757 rmax = imax[row] = imax[row] + CHUNKSIZE; 758 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 759 a->maxnz += bs2*CHUNKSIZE; 760 a->reallocs++; 761 a->nz++; 762 } 763 N = nrow++ - 1; 764 /* shift up all the later entries in this row */ 765 for ( ii=N; ii>=i; ii-- ) { 766 rp[ii+1] = rp[ii]; 767 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); 768 } 769 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar)); 770 rp[i] = col; 771 bap = ap + bs2*i; 772 if (roworiented) { 773 for ( ii=0; ii<bs; ii++,value+=stepval) { 774 for (jj=ii; jj<bs2; jj+=bs ) { 775 bap[jj] = *value++; 776 } 777 } 778 } else { 779 for ( ii=0; ii<bs; ii++,value+=stepval ) { 780 for (jj=0; jj<bs; jj++ ) { 781 *bap++ = *value++; 782 } 783 } 784 } 785 noinsert2:; 786 low = i; 787 } 788 ailen[row] = nrow; 789 } 790 PetscFunctionReturn(0); 791 } 792 793 794 #undef __FUNC__ 795 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 796 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 797 { 798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 799 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 800 int m = a->m,*ip, N, *ailen = a->ilen; 801 int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 802 MatScalar *aa = a->a, *ap; 803 804 PetscFunctionBegin; 805 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 806 807 if (m) rmax = ailen[0]; 808 for ( i=1; i<mbs; i++ ) { 809 /* move each row back by the amount of empty slots (fshift) before it*/ 810 fshift += imax[i-1] - ailen[i-1]; 811 rmax = PetscMax(rmax,ailen[i]); 812 if (fshift) { 813 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 814 N = ailen[i]; 815 for ( j=0; j<N; j++ ) { 816 ip[j-fshift] = ip[j]; 817 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar)); 818 } 819 } 820 ai[i] = ai[i-1] + ailen[i-1]; 821 } 822 if (mbs) { 823 fshift += imax[mbs-1] - ailen[mbs-1]; 824 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 825 } 826 /* reset ilen and imax for each row */ 827 for ( i=0; i<mbs; i++ ) { 828 ailen[i] = imax[i] = ai[i+1] - ai[i]; 829 } 830 a->nz = ai[mbs]; 831 832 /* diagonals may have moved, so kill the diagonal pointers */ 833 if (fshift && a->diag) { 834 PetscFree(a->diag); 835 PLogObjectMemory(A,-(m+1)*sizeof(int)); 836 a->diag = 0; 837 } 838 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 839 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 840 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 841 a->reallocs); 842 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 843 a->reallocs = 0; 844 A->info.nz_unneeded = (double)fshift*bs2; 845 846 PetscFunctionReturn(0); 847 } 848 849 850 851 /* 852 This function returns an array of flags which indicate the locations of contiguous 853 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 854 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 855 Assume: sizes should be long enough to hold all the values. 856 */ 857 #undef __FUNC__ 858 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 859 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 860 { 861 int i,j,k,row; 862 PetscTruth flg; 863 864 /* PetscFunctionBegin;*/ 865 for ( i=0,j=0; i<n; j++ ) { 866 row = idx[i]; 867 if (row%bs!=0) { /* Not the begining of a block */ 868 sizes[j] = 1; 869 i++; 870 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 871 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 872 i++; 873 } else { /* Begining of the block, so check if the complete block exists */ 874 flg = PETSC_TRUE; 875 for ( k=1; k<bs; k++ ) { 876 if (row+k != idx[i+k]) { /* break in the block */ 877 flg = PETSC_FALSE; 878 break; 879 } 880 } 881 if (flg == PETSC_TRUE) { /* No break in the bs */ 882 sizes[j] = bs; 883 i+= bs; 884 } else { 885 sizes[j] = 1; 886 i++; 887 } 888 } 889 } 890 *bs_max = j; 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNC__ 895 #define __FUNC__ "MatZeroRows_SeqBAIJ" 896 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 897 { 898 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 899 int ierr,i,j,k,count,is_n,*is_idx,*rows; 900 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 901 Scalar zero = 0.0; 902 MatScalar *aa; 903 904 PetscFunctionBegin; 905 /* Make a copy of the IS and sort it */ 906 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 907 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 908 909 /* allocate memory for rows,sizes */ 910 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int)); CHKPTRQ(rows); 911 sizes = rows + is_n; 912 913 /* initialize copy IS valurs to rows, and sort them */ 914 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 915 ierr = PetscSortInt(is_n,rows); CHKERRQ(ierr); 916 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max); CHKERRQ(ierr); 917 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 918 919 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 920 row = rows[j]; 921 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 922 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 923 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 924 if (sizes[i] == bs) { 925 if (diag) { 926 if (baij->ilen[row/bs] > 0) { 927 baij->ilen[row/bs] = 1; 928 baij->j[baij->i[row/bs]] = row/bs; 929 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar)); CHKERRQ(ierr); 930 } 931 /* Now insert all the diagoanl values for this bs */ 932 for ( k=0; k<bs; k++ ) { 933 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 934 } 935 } else { /* (!diag) */ 936 baij->ilen[row/bs] = 0; 937 } /* end (!diag) */ 938 } else { /* (sizes[i] != bs) */ 939 #if defined (USE_PETSC_DEBUG) 940 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 941 #endif 942 for ( k=0; k<count; k++ ) { 943 aa[0] = zero; 944 aa+=bs; 945 } 946 if (diag) { 947 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 948 } 949 } 950 } 951 952 PetscFree(rows); 953 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNC__ 958 #define __FUNC__ "MatSetValues_SeqBAIJ" 959 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 960 { 961 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 962 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 963 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 964 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 965 int ridx,cidx,bs2=a->bs2; 966 MatScalar *ap,value,*aa=a->a,*bap; 967 968 PetscFunctionBegin; 969 for ( k=0; k<m; k++ ) { /* loop over added rows */ 970 row = im[k]; brow = row/bs; 971 if (row < 0) continue; 972 #if defined(USE_PETSC_BOPT_g) 973 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 974 #endif 975 rp = aj + ai[brow]; 976 ap = aa + bs2*ai[brow]; 977 rmax = imax[brow]; 978 nrow = ailen[brow]; 979 low = 0; 980 for ( l=0; l<n; l++ ) { /* loop over added columns */ 981 if (in[l] < 0) continue; 982 #if defined(USE_PETSC_BOPT_g) 983 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 984 #endif 985 col = in[l]; bcol = col/bs; 986 ridx = row % bs; cidx = col % bs; 987 if (roworiented) { 988 value = v[l + k*n]; 989 } else { 990 value = v[k + l*m]; 991 } 992 if (!sorted) low = 0; high = nrow; 993 while (high-low > 7) { 994 t = (low+high)/2; 995 if (rp[t] > bcol) high = t; 996 else low = t; 997 } 998 for ( i=low; i<high; i++ ) { 999 if (rp[i] > bcol) break; 1000 if (rp[i] == bcol) { 1001 bap = ap + bs2*i + bs*cidx + ridx; 1002 if (is == ADD_VALUES) *bap += value; 1003 else *bap = value; 1004 goto noinsert1; 1005 } 1006 } 1007 if (nonew == 1) goto noinsert1; 1008 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1009 if (nrow >= rmax) { 1010 /* there is no extra room in row, therefore enlarge */ 1011 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1012 MatScalar *new_a; 1013 1014 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1015 1016 /* Malloc new storage space */ 1017 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1018 new_a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a); 1019 new_j = (int *) (new_a + bs2*new_nz); 1020 new_i = new_j + new_nz; 1021 1022 /* copy over old data into new slots */ 1023 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1024 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1025 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 1026 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1027 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 1028 len*sizeof(int)); 1029 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar)); 1030 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar)); 1031 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 1032 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar)); 1033 /* free up old matrix storage */ 1034 PetscFree(a->a); 1035 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 1036 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1037 a->singlemalloc = 1; 1038 1039 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1040 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1041 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1042 a->maxnz += bs2*CHUNKSIZE; 1043 a->reallocs++; 1044 a->nz++; 1045 } 1046 N = nrow++ - 1; 1047 /* shift up all the later entries in this row */ 1048 for ( ii=N; ii>=i; ii-- ) { 1049 rp[ii+1] = rp[ii]; 1050 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); 1051 } 1052 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar)); 1053 rp[i] = bcol; 1054 ap[bs2*i + bs*cidx + ridx] = value; 1055 noinsert1:; 1056 low = i; 1057 } 1058 ailen[brow] = nrow; 1059 } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1064 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1065 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1066 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1067 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1068 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1069 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1070 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1071 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1072 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1073 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1074 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1075 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1076 extern int MatZeroEntries_SeqBAIJ(Mat); 1077 1078 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1079 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1080 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1081 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1082 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1083 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1084 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1085 1086 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1087 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1088 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1089 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1090 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1091 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1092 1093 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1094 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1095 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1096 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1097 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1098 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1099 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1100 1101 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1102 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1103 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1104 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1105 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1106 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1107 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1108 1109 #undef __FUNC__ 1110 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1111 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1112 { 1113 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1114 Mat outA; 1115 int ierr; 1116 PetscTruth row_identity, col_identity; 1117 1118 PetscFunctionBegin; 1119 if (info && info->fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported"); 1120 ierr = ISIdentity(row,&row_identity); CHKERRQ(ierr); 1121 ierr = ISIdentity(col,&col_identity); CHKERRQ(ierr); 1122 if (!row_identity || !col_identity) { 1123 SETERRQ(1,1,"Row and column permutations must be identity"); 1124 } 1125 1126 outA = inA; 1127 inA->factor = FACTOR_LU; 1128 a->row = row; 1129 a->col = col; 1130 1131 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1132 ierr = ISInvertPermutation(col,&(a->icol)); CHKERRQ(ierr); 1133 PLogObjectParent(inA,a->icol); 1134 1135 if (!a->solve_work) { 1136 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1137 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1138 } 1139 1140 if (!a->diag) { 1141 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1142 } 1143 /* 1144 Blocksize 4 and 5 have a special faster factorization/solver for ILU(0) factorization 1145 with natural ordering 1146 */ 1147 if (a->bs == 4) { 1148 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1149 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1150 PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n"); 1151 } else if (a->bs == 5) { 1152 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1153 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1154 PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n"); 1155 } 1156 1157 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1158 1159 1160 PetscFunctionReturn(0); 1161 } 1162 #undef __FUNC__ 1163 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1164 int MatPrintHelp_SeqBAIJ(Mat A) 1165 { 1166 static int called = 0; 1167 MPI_Comm comm = A->comm; 1168 1169 PetscFunctionBegin; 1170 if (called) {PetscFunctionReturn(0);} else called = 1; 1171 (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1172 (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n"); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNC__ 1178 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1179 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1180 { 1181 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1182 int i,nz,n; 1183 1184 PetscFunctionBegin; 1185 nz = baij->maxnz; 1186 n = baij->n; 1187 for (i=0; i<nz; i++) { 1188 baij->j[i] = indices[i]; 1189 } 1190 baij->nz = nz; 1191 for ( i=0; i<n; i++ ) { 1192 baij->ilen[i] = baij->imax[i]; 1193 } 1194 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNC__ 1200 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1201 /*@ 1202 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1203 in the matrix. 1204 1205 Input Parameters: 1206 + mat - the SeqBAIJ matrix 1207 - indices - the column indices 1208 1209 Notes: 1210 This can be called if you have precomputed the nonzero structure of the 1211 matrix and want to provide it to the matrix object to improve the performance 1212 of the MatSetValues() operation. 1213 1214 You MUST have set the correct numbers of nonzeros per row in the call to 1215 MatCreateSeqBAIJ(). 1216 1217 MUST be called before any calls to MatSetValues(); 1218 1219 @*/ 1220 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1221 { 1222 int ierr,(*f)(Mat,int *); 1223 1224 PetscFunctionBegin; 1225 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1226 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f); 1227 CHKERRQ(ierr); 1228 if (f) { 1229 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1230 } else { 1231 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 /* -------------------------------------------------------------------*/ 1237 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1238 MatGetRow_SeqBAIJ, 1239 MatRestoreRow_SeqBAIJ, 1240 MatMult_SeqBAIJ_N, 1241 MatMultAdd_SeqBAIJ_N, 1242 MatMultTrans_SeqBAIJ, 1243 MatMultTransAdd_SeqBAIJ, 1244 MatSolve_SeqBAIJ_N, 1245 0, 1246 0, 1247 0, 1248 MatLUFactor_SeqBAIJ, 1249 0, 1250 0, 1251 MatTranspose_SeqBAIJ, 1252 MatGetInfo_SeqBAIJ, 1253 MatEqual_SeqBAIJ, 1254 MatGetDiagonal_SeqBAIJ, 1255 MatDiagonalScale_SeqBAIJ, 1256 MatNorm_SeqBAIJ, 1257 0, 1258 MatAssemblyEnd_SeqBAIJ, 1259 0, 1260 MatSetOption_SeqBAIJ, 1261 MatZeroEntries_SeqBAIJ, 1262 MatZeroRows_SeqBAIJ, 1263 MatLUFactorSymbolic_SeqBAIJ, 1264 MatLUFactorNumeric_SeqBAIJ_N, 1265 0, 1266 0, 1267 MatGetSize_SeqBAIJ, 1268 MatGetSize_SeqBAIJ, 1269 MatGetOwnershipRange_SeqBAIJ, 1270 MatILUFactorSymbolic_SeqBAIJ, 1271 0, 1272 0, 1273 0, 1274 MatDuplicate_SeqBAIJ, 1275 0, 1276 0, 1277 MatILUFactor_SeqBAIJ, 1278 0, 1279 0, 1280 MatGetSubMatrices_SeqBAIJ, 1281 MatIncreaseOverlap_SeqBAIJ, 1282 MatGetValues_SeqBAIJ, 1283 0, 1284 MatPrintHelp_SeqBAIJ, 1285 MatScale_SeqBAIJ, 1286 0, 1287 0, 1288 0, 1289 MatGetBlockSize_SeqBAIJ, 1290 MatGetRowIJ_SeqBAIJ, 1291 MatRestoreRowIJ_SeqBAIJ, 1292 0, 1293 0, 1294 0, 1295 0, 1296 0, 1297 0, 1298 MatSetValuesBlocked_SeqBAIJ, 1299 MatGetSubMatrix_SeqBAIJ, 1300 0, 1301 0, 1302 MatGetMaps_Petsc}; 1303 1304 #undef __FUNC__ 1305 #define __FUNC__ "MatCreateSeqBAIJ" 1306 /*@C 1307 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1308 compressed row) format. For good matrix assembly performance the 1309 user should preallocate the matrix storage by setting the parameter nz 1310 (or the array nzz). By setting these parameters accurately, performance 1311 during matrix assembly can be increased by more than a factor of 50. 1312 1313 Collective on MPI_Comm 1314 1315 Input Parameters: 1316 + comm - MPI communicator, set to PETSC_COMM_SELF 1317 . bs - size of block 1318 . m - number of rows 1319 . n - number of columns 1320 . nz - number of block nonzeros per block row (same for all rows) 1321 - nzz - array containing the number of block nonzeros in the various block rows 1322 (possibly different for each block row) or PETSC_NULL 1323 1324 Output Parameter: 1325 . A - the matrix 1326 1327 Options Database Keys: 1328 . -mat_no_unroll - uses code that does not unroll the loops in the 1329 block calculations (much slower) 1330 . -mat_block_size - size of the blocks to use 1331 1332 Notes: 1333 The block AIJ format is fully compatible with standard Fortran 77 1334 storage. That is, the stored row and column indices can begin at 1335 either one (as in Fortran) or zero. See the users' manual for details. 1336 1337 Specify the preallocated storage with either nz or nnz (not both). 1338 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1339 allocation. For additional details, see the users manual chapter on 1340 matrices. 1341 1342 Level: intermediate 1343 1344 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1345 @*/ 1346 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1347 { 1348 Mat B; 1349 Mat_SeqBAIJ *b; 1350 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 1351 1352 PetscFunctionBegin; 1353 MPI_Comm_size(comm,&size); 1354 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1355 1356 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1357 1358 if (mbs*bs!=m || nbs*bs!=n) { 1359 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1360 } 1361 1362 *A = 0; 1363 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1364 PLogObjectCreate(B); 1365 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1366 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1367 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1368 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1369 if (!flg) { 1370 switch (bs) { 1371 case 1: 1372 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1373 B->ops->solve = MatSolve_SeqBAIJ_1; 1374 B->ops->mult = MatMult_SeqBAIJ_1; 1375 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1376 break; 1377 case 2: 1378 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1379 B->ops->solve = MatSolve_SeqBAIJ_2; 1380 B->ops->mult = MatMult_SeqBAIJ_2; 1381 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1382 break; 1383 case 3: 1384 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1385 B->ops->solve = MatSolve_SeqBAIJ_3; 1386 B->ops->mult = MatMult_SeqBAIJ_3; 1387 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1388 break; 1389 case 4: 1390 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1391 B->ops->solve = MatSolve_SeqBAIJ_4; 1392 B->ops->mult = MatMult_SeqBAIJ_4; 1393 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1394 break; 1395 case 5: 1396 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1397 B->ops->solve = MatSolve_SeqBAIJ_5; 1398 B->ops->mult = MatMult_SeqBAIJ_5; 1399 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1400 break; 1401 case 7: 1402 B->ops->mult = MatMult_SeqBAIJ_7; 1403 B->ops->solve = MatSolve_SeqBAIJ_7; 1404 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1405 break; 1406 } 1407 } 1408 B->ops->destroy = MatDestroy_SeqBAIJ; 1409 B->ops->view = MatView_SeqBAIJ; 1410 B->factor = 0; 1411 B->lupivotthreshold = 1.0; 1412 B->mapping = 0; 1413 b->row = 0; 1414 b->col = 0; 1415 b->icol = 0; 1416 b->reallocs = 0; 1417 1418 b->m = m; B->m = m; B->M = m; 1419 b->n = n; B->n = n; B->N = n; 1420 1421 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1422 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1423 1424 b->mbs = mbs; 1425 b->nbs = nbs; 1426 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1427 if (nnz == PETSC_NULL) { 1428 if (nz == PETSC_DEFAULT) nz = 5; 1429 else if (nz <= 0) nz = 1; 1430 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1431 nz = nz*mbs; 1432 } else { 1433 nz = 0; 1434 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1435 } 1436 1437 /* allocate the matrix space */ 1438 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1439 b->a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1440 PetscMemzero(b->a,nz*bs2*sizeof(MatScalar)); 1441 b->j = (int *) (b->a + nz*bs2); 1442 PetscMemzero(b->j,nz*sizeof(int)); 1443 b->i = b->j + nz; 1444 b->singlemalloc = 1; 1445 1446 b->i[0] = 0; 1447 for (i=1; i<mbs+1; i++) { 1448 b->i[i] = b->i[i-1] + b->imax[i-1]; 1449 } 1450 1451 /* b->ilen will count nonzeros in each block row so far. */ 1452 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1453 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1454 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1455 1456 b->bs = bs; 1457 b->bs2 = bs2; 1458 b->mbs = mbs; 1459 b->nz = 0; 1460 b->maxnz = nz*bs2; 1461 b->sorted = 0; 1462 b->roworiented = 1; 1463 b->nonew = 0; 1464 b->diag = 0; 1465 b->solve_work = 0; 1466 b->mult_work = 0; 1467 b->spptr = 0; 1468 B->info.nz_unneeded = (double)b->maxnz; 1469 1470 *A = B; 1471 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1472 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1473 1474 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1475 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1476 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNC__ 1481 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1482 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1483 { 1484 Mat C; 1485 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1486 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1487 1488 PetscFunctionBegin; 1489 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1490 1491 *B = 0; 1492 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1493 PLogObjectCreate(C); 1494 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1495 PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps)); 1496 C->ops->destroy = MatDestroy_SeqBAIJ; 1497 C->ops->view = MatView_SeqBAIJ; 1498 C->factor = A->factor; 1499 c->row = 0; 1500 c->col = 0; 1501 C->assembled = PETSC_TRUE; 1502 1503 c->m = C->m = a->m; 1504 c->n = C->n = a->n; 1505 C->M = a->m; 1506 C->N = a->n; 1507 1508 c->bs = a->bs; 1509 c->bs2 = a->bs2; 1510 c->mbs = a->mbs; 1511 c->nbs = a->nbs; 1512 1513 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1514 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1515 for ( i=0; i<mbs; i++ ) { 1516 c->imax[i] = a->imax[i]; 1517 c->ilen[i] = a->ilen[i]; 1518 } 1519 1520 /* allocate the matrix space */ 1521 c->singlemalloc = 1; 1522 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1523 c->a = (MatScalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1524 c->j = (int *) (c->a + nz*bs2); 1525 c->i = c->j + nz; 1526 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1527 if (mbs > 0) { 1528 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1529 if (cpvalues == MAT_COPY_VALUES) { 1530 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar)); 1531 } else { 1532 PetscMemzero(c->a,bs2*nz*sizeof(MatScalar)); 1533 } 1534 } 1535 1536 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1537 c->sorted = a->sorted; 1538 c->roworiented = a->roworiented; 1539 c->nonew = a->nonew; 1540 1541 if (a->diag) { 1542 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1543 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1544 for ( i=0; i<mbs; i++ ) { 1545 c->diag[i] = a->diag[i]; 1546 } 1547 } 1548 else c->diag = 0; 1549 c->nz = a->nz; 1550 c->maxnz = a->maxnz; 1551 c->solve_work = 0; 1552 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1553 c->mult_work = 0; 1554 *B = C; 1555 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNC__ 1560 #define __FUNC__ "MatLoad_SeqBAIJ" 1561 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1562 { 1563 Mat_SeqBAIJ *a; 1564 Mat B; 1565 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1566 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1567 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1568 int *masked, nmask,tmp,bs2,ishift; 1569 Scalar *aa; 1570 MPI_Comm comm = ((PetscObject) viewer)->comm; 1571 1572 PetscFunctionBegin; 1573 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1574 bs2 = bs*bs; 1575 1576 MPI_Comm_size(comm,&size); 1577 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1578 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1579 ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr); 1580 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1581 M = header[1]; N = header[2]; nz = header[3]; 1582 1583 if (header[3] < 0) { 1584 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1585 } 1586 1587 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1588 1589 /* 1590 This code adds extra rows to make sure the number of rows is 1591 divisible by the blocksize 1592 */ 1593 mbs = M/bs; 1594 extra_rows = bs - M + bs*(mbs); 1595 if (extra_rows == bs) extra_rows = 0; 1596 else mbs++; 1597 if (extra_rows) { 1598 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1599 } 1600 1601 /* read in row lengths */ 1602 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1603 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1604 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1605 1606 /* read in column indices */ 1607 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1608 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr); 1609 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1610 1611 /* loop over row lengths determining block row lengths */ 1612 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1613 PetscMemzero(browlengths,mbs*sizeof(int)); 1614 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1615 PetscMemzero(mask,mbs*sizeof(int)); 1616 masked = mask + mbs; 1617 rowcount = 0; nzcount = 0; 1618 for ( i=0; i<mbs; i++ ) { 1619 nmask = 0; 1620 for ( j=0; j<bs; j++ ) { 1621 kmax = rowlengths[rowcount]; 1622 for ( k=0; k<kmax; k++ ) { 1623 tmp = jj[nzcount++]/bs; 1624 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1625 } 1626 rowcount++; 1627 } 1628 browlengths[i] += nmask; 1629 /* zero out the mask elements we set */ 1630 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1631 } 1632 1633 /* create our matrix */ 1634 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1635 B = *A; 1636 a = (Mat_SeqBAIJ *) B->data; 1637 1638 /* set matrix "i" values */ 1639 a->i[0] = 0; 1640 for ( i=1; i<= mbs; i++ ) { 1641 a->i[i] = a->i[i-1] + browlengths[i-1]; 1642 a->ilen[i-1] = browlengths[i-1]; 1643 } 1644 a->nz = 0; 1645 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1646 1647 /* read in nonzero values */ 1648 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1649 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr); 1650 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1651 1652 /* set "a" and "j" values into matrix */ 1653 nzcount = 0; jcount = 0; 1654 for ( i=0; i<mbs; i++ ) { 1655 nzcountb = nzcount; 1656 nmask = 0; 1657 for ( j=0; j<bs; j++ ) { 1658 kmax = rowlengths[i*bs+j]; 1659 for ( k=0; k<kmax; k++ ) { 1660 tmp = jj[nzcount++]/bs; 1661 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1662 } 1663 rowcount++; 1664 } 1665 /* sort the masked values */ 1666 PetscSortInt(nmask,masked); 1667 1668 /* set "j" values into matrix */ 1669 maskcount = 1; 1670 for ( j=0; j<nmask; j++ ) { 1671 a->j[jcount++] = masked[j]; 1672 mask[masked[j]] = maskcount++; 1673 } 1674 /* set "a" values into matrix */ 1675 ishift = bs2*a->i[i]; 1676 for ( j=0; j<bs; j++ ) { 1677 kmax = rowlengths[i*bs+j]; 1678 for ( k=0; k<kmax; k++ ) { 1679 tmp = jj[nzcountb]/bs ; 1680 block = mask[tmp] - 1; 1681 point = jj[nzcountb] - bs*tmp; 1682 idx = ishift + bs2*block + j + bs*point; 1683 a->a[idx] = aa[nzcountb++]; 1684 } 1685 } 1686 /* zero out the mask elements we set */ 1687 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1688 } 1689 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1690 1691 PetscFree(rowlengths); 1692 PetscFree(browlengths); 1693 PetscFree(aa); 1694 PetscFree(jj); 1695 PetscFree(mask); 1696 1697 B->assembled = PETSC_TRUE; 1698 1699 ierr = MatView_Private(B); CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 1704 1705