1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 53 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 54 { 55 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 56 57 PetscFunctionBegin; 58 if (ia) { 59 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 60 } 61 *nn = a->mbs; 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 67 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 68 { 69 PetscFunctionBegin; 70 if (!ia) PetscFunctionReturn(0); 71 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 72 /* PetscFunctionReturn(0); */ 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 77 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 78 { 79 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 80 81 PetscFunctionBegin; 82 *bs = sbaij->bs; 83 PetscFunctionReturn(0); 84 } 85 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 88 int MatDestroy_SeqSBAIJ(Mat A) 89 { 90 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 91 int ierr; 92 93 PetscFunctionBegin; 94 #if defined(PETSC_USE_LOG) 95 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 96 #endif 97 ierr = PetscFree(a->a);CHKERRQ(ierr); 98 if (!a->singlemalloc) { 99 ierr = PetscFree(a->i);CHKERRQ(ierr); 100 ierr = PetscFree(a->j);CHKERRQ(ierr); 101 } 102 if (a->row) { 103 ierr = ISDestroy(a->row);CHKERRQ(ierr); 104 } 105 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 106 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 107 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 108 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 123 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 124 { 125 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 126 127 PetscFunctionBegin; 128 switch (op) { 129 case MAT_ROW_ORIENTED: 130 a->roworiented = PETSC_TRUE; 131 break; 132 case MAT_COLUMN_ORIENTED: 133 a->roworiented = PETSC_FALSE; 134 break; 135 case MAT_COLUMNS_SORTED: 136 a->sorted = PETSC_TRUE; 137 break; 138 case MAT_COLUMNS_UNSORTED: 139 a->sorted = PETSC_FALSE; 140 break; 141 case MAT_KEEP_ZEROED_ROWS: 142 a->keepzeroedrows = PETSC_TRUE; 143 break; 144 case MAT_NO_NEW_NONZERO_LOCATIONS: 145 a->nonew = 1; 146 break; 147 case MAT_NEW_NONZERO_LOCATION_ERR: 148 a->nonew = -1; 149 break; 150 case MAT_NEW_NONZERO_ALLOCATION_ERR: 151 a->nonew = -2; 152 break; 153 case MAT_YES_NEW_NONZERO_LOCATIONS: 154 a->nonew = 0; 155 break; 156 case MAT_ROWS_SORTED: 157 case MAT_ROWS_UNSORTED: 158 case MAT_YES_NEW_DIAGONALS: 159 case MAT_IGNORE_OFF_PROC_ENTRIES: 160 case MAT_USE_HASH_TABLE: 161 case MAT_USE_SINGLE_PRECISION_SOLVES: 162 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 163 break; 164 case MAT_NO_NEW_DIAGONALS: 165 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 166 default: 167 SETERRQ(PETSC_ERR_SUP,"unknown option"); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 174 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 175 { 176 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 177 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 178 MatScalar *aa,*aa_i; 179 PetscScalar *v_i; 180 181 PetscFunctionBegin; 182 bs = a->bs; 183 ai = a->i; 184 aj = a->j; 185 aa = a->a; 186 bs2 = a->bs2; 187 188 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 189 190 bn = row/bs; /* Block number */ 191 bp = row % bs; /* Block position */ 192 M = ai[bn+1] - ai[bn]; 193 *ncols = bs*M; 194 195 if (v) { 196 *v = 0; 197 if (*ncols) { 198 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 199 for (i=0; i<M; i++) { /* for each block in the block row */ 200 v_i = *v + i*bs; 201 aa_i = aa + bs2*(ai[bn] + i); 202 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 203 } 204 } 205 } 206 207 if (cols) { 208 *cols = 0; 209 if (*ncols) { 210 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 211 for (i=0; i<M; i++) { /* for each block in the block row */ 212 cols_i = *cols + i*bs; 213 itmp = bs*aj[ai[bn] + i]; 214 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 215 } 216 } 217 } 218 219 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 220 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 221 #ifdef column_search 222 v_i = *v + M*bs; 223 cols_i = *cols + M*bs; 224 for (i=0; i<bn; i++){ /* for each block row */ 225 M = ai[i+1] - ai[i]; 226 for (j=0; j<M; j++){ 227 itmp = aj[ai[i] + j]; /* block column value */ 228 if (itmp == bn){ 229 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 230 for (k=0; k<bs; k++) { 231 *cols_i++ = i*bs+k; 232 *v_i++ = aa_i[k]; 233 } 234 *ncols += bs; 235 break; 236 } 237 } 238 } 239 #endif 240 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 246 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 247 { 248 int ierr; 249 250 PetscFunctionBegin; 251 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 252 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 258 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 259 { 260 PetscFunctionBegin; 261 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 262 /* PetscFunctionReturn(0); */ 263 } 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 267 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 268 { 269 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 270 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 271 PetscScalar *aa; 272 FILE *file; 273 274 PetscFunctionBegin; 275 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 276 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 277 col_lens[0] = MAT_FILE_COOKIE; 278 279 col_lens[1] = A->m; 280 col_lens[2] = A->m; 281 col_lens[3] = a->s_nz*bs2; 282 283 /* store lengths of each row and write (including header) to file */ 284 count = 0; 285 for (i=0; i<a->mbs; i++) { 286 for (j=0; j<bs; j++) { 287 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 288 } 289 } 290 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 291 ierr = PetscFree(col_lens);CHKERRQ(ierr); 292 293 /* store column indices (zero start index) */ 294 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 295 count = 0; 296 for (i=0; i<a->mbs; i++) { 297 for (j=0; j<bs; j++) { 298 for (k=a->i[i]; k<a->i[i+1]; k++) { 299 for (l=0; l<bs; l++) { 300 jj[count++] = bs*a->j[k] + l; 301 } 302 } 303 } 304 } 305 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 306 ierr = PetscFree(jj);CHKERRQ(ierr); 307 308 /* store nonzero values */ 309 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 310 count = 0; 311 for (i=0; i<a->mbs; i++) { 312 for (j=0; j<bs; j++) { 313 for (k=a->i[i]; k<a->i[i+1]; k++) { 314 for (l=0; l<bs; l++) { 315 aa[count++] = a->a[bs2*k + l*bs + j]; 316 } 317 } 318 } 319 } 320 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 321 ierr = PetscFree(aa);CHKERRQ(ierr); 322 323 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 324 if (file) { 325 fprintf(file,"-matload_block_size %d\n",a->bs); 326 } 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 332 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 333 { 334 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 335 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 336 char *name; 337 PetscViewerFormat format; 338 339 PetscFunctionBegin; 340 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 341 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 342 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 343 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 344 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 345 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 346 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 347 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 348 for (i=0; i<a->mbs; i++) { 349 for (j=0; j<bs; j++) { 350 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 351 for (k=a->i[i]; k<a->i[i+1]; k++) { 352 for (l=0; l<bs; l++) { 353 #if defined(PETSC_USE_COMPLEX) 354 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 355 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 356 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 357 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 358 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 359 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 360 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 361 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 362 } 363 #else 364 if (a->a[bs2*k + l*bs + j] != 0.0) { 365 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 366 } 367 #endif 368 } 369 } 370 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 371 } 372 } 373 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 374 } else { 375 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 376 for (i=0; i<a->mbs; i++) { 377 for (j=0; j<bs; j++) { 378 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 379 for (k=a->i[i]; k<a->i[i+1]; k++) { 380 for (l=0; l<bs; l++) { 381 #if defined(PETSC_USE_COMPLEX) 382 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 383 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 384 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 385 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 386 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 387 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 388 } else { 389 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 390 } 391 #else 392 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 393 #endif 394 } 395 } 396 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 397 } 398 } 399 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 400 } 401 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 #undef __FUNCT__ 406 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 407 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 408 { 409 Mat A = (Mat) Aa; 410 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 411 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 412 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 413 MatScalar *aa; 414 MPI_Comm comm; 415 PetscViewer viewer; 416 417 PetscFunctionBegin; 418 /* 419 This is nasty. If this is called from an originally parallel matrix 420 then all processes call this,but only the first has the matrix so the 421 rest should return immediately. 422 */ 423 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 424 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 425 if (rank) PetscFunctionReturn(0); 426 427 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 428 429 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 430 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 431 432 /* loop over matrix elements drawing boxes */ 433 color = PETSC_DRAW_BLUE; 434 for (i=0,row=0; i<mbs; i++,row+=bs) { 435 for (j=a->i[i]; j<a->i[i+1]; j++) { 436 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 437 x_l = a->j[j]*bs; x_r = x_l + 1.0; 438 aa = a->a + j*bs2; 439 for (k=0; k<bs; k++) { 440 for (l=0; l<bs; l++) { 441 if (PetscRealPart(*aa++) >= 0.) continue; 442 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 443 } 444 } 445 } 446 } 447 color = PETSC_DRAW_CYAN; 448 for (i=0,row=0; i<mbs; i++,row+=bs) { 449 for (j=a->i[i]; j<a->i[i+1]; j++) { 450 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 451 x_l = a->j[j]*bs; x_r = x_l + 1.0; 452 aa = a->a + j*bs2; 453 for (k=0; k<bs; k++) { 454 for (l=0; l<bs; l++) { 455 if (PetscRealPart(*aa++) != 0.) continue; 456 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 457 } 458 } 459 } 460 } 461 462 color = PETSC_DRAW_RED; 463 for (i=0,row=0; i<mbs; i++,row+=bs) { 464 for (j=a->i[i]; j<a->i[i+1]; j++) { 465 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 466 x_l = a->j[j]*bs; x_r = x_l + 1.0; 467 aa = a->a + j*bs2; 468 for (k=0; k<bs; k++) { 469 for (l=0; l<bs; l++) { 470 if (PetscRealPart(*aa++) <= 0.) continue; 471 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 472 } 473 } 474 } 475 } 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 481 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 482 { 483 int ierr; 484 PetscReal xl,yl,xr,yr,w,h; 485 PetscDraw draw; 486 PetscTruth isnull; 487 488 PetscFunctionBegin; 489 490 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 491 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 492 493 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 494 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 495 xr += w; yr += h; xl = -w; yl = -h; 496 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 497 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 498 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 499 PetscFunctionReturn(0); 500 } 501 502 #undef __FUNCT__ 503 #define __FUNCT__ "MatView_SeqSBAIJ" 504 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 505 { 506 int ierr; 507 PetscTruth issocket,isascii,isbinary,isdraw; 508 509 PetscFunctionBegin; 510 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 511 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 512 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 513 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 514 if (issocket) { 515 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 516 } else if (isascii){ 517 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 518 } else if (isbinary) { 519 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 520 } else if (isdraw) { 521 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 522 } else { 523 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 524 } 525 PetscFunctionReturn(0); 526 } 527 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 531 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 532 { 533 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 534 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 535 int *ai = a->i,*ailen = a->ilen; 536 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 537 MatScalar *ap,*aa = a->a,zero = 0.0; 538 539 PetscFunctionBegin; 540 for (k=0; k<m; k++) { /* loop over rows */ 541 row = im[k]; brow = row/bs; 542 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 543 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 544 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 545 nrow = ailen[brow]; 546 for (l=0; l<n; l++) { /* loop over columns */ 547 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 548 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 549 col = in[l] ; 550 bcol = col/bs; 551 cidx = col%bs; 552 ridx = row%bs; 553 high = nrow; 554 low = 0; /* assume unsorted */ 555 while (high-low > 5) { 556 t = (low+high)/2; 557 if (rp[t] > bcol) high = t; 558 else low = t; 559 } 560 for (i=low; i<high; i++) { 561 if (rp[i] > bcol) break; 562 if (rp[i] == bcol) { 563 *v++ = ap[bs2*i+bs*cidx+ridx]; 564 goto finished; 565 } 566 } 567 *v++ = zero; 568 finished:; 569 } 570 } 571 PetscFunctionReturn(0); 572 } 573 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 577 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 578 { 579 PetscFunctionBegin; 580 SETERRQ(1,"Function not yet written for SBAIJ format"); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 585 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 586 { 587 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 588 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 589 int m = A->m,*ip,N,*ailen = a->ilen; 590 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 591 MatScalar *aa = a->a,*ap; 592 593 PetscFunctionBegin; 594 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 595 596 if (m) rmax = ailen[0]; 597 for (i=1; i<mbs; i++) { 598 /* move each row back by the amount of empty slots (fshift) before it*/ 599 fshift += imax[i-1] - ailen[i-1]; 600 rmax = PetscMax(rmax,ailen[i]); 601 if (fshift) { 602 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 603 N = ailen[i]; 604 for (j=0; j<N; j++) { 605 ip[j-fshift] = ip[j]; 606 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 607 } 608 } 609 ai[i] = ai[i-1] + ailen[i-1]; 610 } 611 if (mbs) { 612 fshift += imax[mbs-1] - ailen[mbs-1]; 613 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 614 } 615 /* reset ilen and imax for each row */ 616 for (i=0; i<mbs; i++) { 617 ailen[i] = imax[i] = ai[i+1] - ai[i]; 618 } 619 a->s_nz = ai[mbs]; 620 621 /* diagonals may have moved, reset it */ 622 if (a->diag) { 623 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 624 } 625 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 626 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 627 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 628 a->reallocs); 629 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 630 a->reallocs = 0; 631 A->info.nz_unneeded = (PetscReal)fshift*bs2; 632 633 PetscFunctionReturn(0); 634 } 635 636 /* 637 This function returns an array of flags which indicate the locations of contiguous 638 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 639 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 640 Assume: sizes should be long enough to hold all the values. 641 */ 642 #undef __FUNCT__ 643 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 644 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 645 { 646 int i,j,k,row; 647 PetscTruth flg; 648 649 PetscFunctionBegin; 650 for (i=0,j=0; i<n; j++) { 651 row = idx[i]; 652 if (row%bs!=0) { /* Not the begining of a block */ 653 sizes[j] = 1; 654 i++; 655 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 656 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 657 i++; 658 } else { /* Begining of the block, so check if the complete block exists */ 659 flg = PETSC_TRUE; 660 for (k=1; k<bs; k++) { 661 if (row+k != idx[i+k]) { /* break in the block */ 662 flg = PETSC_FALSE; 663 break; 664 } 665 } 666 if (flg == PETSC_TRUE) { /* No break in the bs */ 667 sizes[j] = bs; 668 i+= bs; 669 } else { 670 sizes[j] = 1; 671 i++; 672 } 673 } 674 } 675 *bs_max = j; 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 681 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 682 { 683 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 684 int ierr,i,j,k,count,is_n,*is_idx,*rows; 685 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 686 PetscScalar zero = 0.0; 687 MatScalar *aa; 688 689 PetscFunctionBegin; 690 /* Make a copy of the IS and sort it */ 691 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 692 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 693 694 /* allocate memory for rows,sizes */ 695 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 696 sizes = rows + is_n; 697 698 /* initialize copy IS values to rows, and sort them */ 699 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 700 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 701 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 702 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 703 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 704 } else { 705 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 706 } 707 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 708 709 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 710 row = rows[j]; /* row to be zeroed */ 711 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 712 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 713 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 714 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 715 if (diag) { 716 if (sbaij->ilen[row/bs] > 0) { 717 sbaij->ilen[row/bs] = 1; 718 sbaij->j[sbaij->i[row/bs]] = row/bs; 719 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 /* Now insert all the diagoanl values for this bs */ 722 for (k=0; k<bs; k++) { 723 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 724 } 725 } else { /* (!diag) */ 726 sbaij->ilen[row/bs] = 0; 727 } /* end (!diag) */ 728 } else { /* (sizes[i] != bs), broken block */ 729 #if defined (PETSC_USE_DEBUG) 730 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 731 #endif 732 for (k=0; k<count; k++) { 733 aa[0] = zero; 734 aa+=bs; 735 } 736 if (diag) { 737 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 738 } 739 } 740 } 741 742 ierr = PetscFree(rows);CHKERRQ(ierr); 743 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 744 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 PetscFunctionReturn(0); 746 } 747 748 /* Only add/insert a(i,j) with i<=j (blocks). 749 Any a(i,j) with i>j input by user is ingored. 750 */ 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 754 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 755 { 756 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 757 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 758 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 759 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 760 int ridx,cidx,bs2=a->bs2,ierr; 761 MatScalar *ap,value,*aa=a->a,*bap; 762 763 PetscFunctionBegin; 764 765 for (k=0; k<m; k++) { /* loop over added rows */ 766 row = im[k]; /* row number */ 767 brow = row/bs; /* block row number */ 768 if (row < 0) continue; 769 #if defined(PETSC_USE_BOPT_g) 770 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 771 #endif 772 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 773 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 774 rmax = imax[brow]; /* maximum space allocated for this row */ 775 nrow = ailen[brow]; /* actual length of this row */ 776 low = 0; 777 778 for (l=0; l<n; l++) { /* loop over added columns */ 779 if (in[l] < 0) continue; 780 #if defined(PETSC_USE_BOPT_g) 781 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 782 #endif 783 col = in[l]; 784 bcol = col/bs; /* block col number */ 785 786 if (brow <= bcol){ 787 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 788 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 789 /* element value a(k,l) */ 790 if (roworiented) { 791 value = v[l + k*n]; 792 } else { 793 value = v[k + l*m]; 794 } 795 796 /* move pointer bap to a(k,l) quickly and add/insert value */ 797 if (!sorted) low = 0; high = nrow; 798 while (high-low > 7) { 799 t = (low+high)/2; 800 if (rp[t] > bcol) high = t; 801 else low = t; 802 } 803 for (i=low; i<high; i++) { 804 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 805 if (rp[i] > bcol) break; 806 if (rp[i] == bcol) { 807 bap = ap + bs2*i + bs*cidx + ridx; 808 if (is == ADD_VALUES) *bap += value; 809 else *bap = value; 810 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 811 if (brow == bcol && ridx < cidx){ 812 bap = ap + bs2*i + bs*ridx + cidx; 813 if (is == ADD_VALUES) *bap += value; 814 else *bap = value; 815 } 816 goto noinsert1; 817 } 818 } 819 820 if (nonew == 1) goto noinsert1; 821 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 822 if (nrow >= rmax) { 823 /* there is no extra room in row, therefore enlarge */ 824 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 825 MatScalar *new_a; 826 827 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 828 829 /* Malloc new storage space */ 830 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 831 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 832 new_j = (int*)(new_a + bs2*new_nz); 833 new_i = new_j + new_nz; 834 835 /* copy over old data into new slots */ 836 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 837 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 838 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 839 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 840 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 841 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 842 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 843 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 844 /* free up old matrix storage */ 845 ierr = PetscFree(a->a);CHKERRQ(ierr); 846 if (!a->singlemalloc) { 847 ierr = PetscFree(a->i);CHKERRQ(ierr); 848 ierr = PetscFree(a->j);CHKERRQ(ierr); 849 } 850 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 851 a->singlemalloc = PETSC_TRUE; 852 853 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 854 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 855 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 856 a->s_maxnz += bs2*CHUNKSIZE; 857 a->reallocs++; 858 a->s_nz++; 859 } 860 861 N = nrow++ - 1; 862 /* shift up all the later entries in this row */ 863 for (ii=N; ii>=i; ii--) { 864 rp[ii+1] = rp[ii]; 865 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 866 } 867 if (N>=i) { 868 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 869 } 870 rp[i] = bcol; 871 ap[bs2*i + bs*cidx + ridx] = value; 872 noinsert1:; 873 low = i; 874 /* } */ 875 } 876 } /* end of if .. if.. */ 877 } /* end of loop over added columns */ 878 ailen[brow] = nrow; 879 } /* end of loop over added rows */ 880 881 PetscFunctionReturn(0); 882 } 883 884 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 885 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 886 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 887 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 888 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 889 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 890 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 891 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 892 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 893 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 894 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 895 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 896 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 897 extern int MatZeroEntries_SeqSBAIJ(Mat); 898 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 899 900 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 901 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 902 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 903 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 904 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 905 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 906 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 907 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 908 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 909 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 910 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 911 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 912 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 913 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 914 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 915 916 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 917 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 918 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 919 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 920 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 921 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 922 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 923 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 924 925 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 926 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 927 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 928 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 929 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 930 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 931 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 932 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 933 934 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 935 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 936 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 937 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 938 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 939 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 940 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 941 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 942 943 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 944 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 945 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 946 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 947 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 948 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 949 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 950 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 951 952 #ifdef HAVE_MatICCFactor 953 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 956 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 957 { 958 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 959 Mat outA; 960 int ierr; 961 PetscTruth row_identity,col_identity; 962 963 PetscFunctionBegin; 964 /* 965 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 966 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 967 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 968 if (!row_identity || !col_identity) { 969 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 970 } 971 */ 972 973 outA = inA; 974 inA->factor = FACTOR_CHOLESKY; 975 976 if (!a->diag) { 977 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 978 } 979 /* 980 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 981 for ILU(0) factorization with natural ordering 982 */ 983 switch (a->bs) { 984 case 1: 985 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 986 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 987 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 988 case 2: 989 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 990 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 991 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 992 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 993 break; 994 case 3: 995 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 996 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 997 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 998 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 999 break; 1000 case 4: 1001 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1002 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1003 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1004 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1005 break; 1006 case 5: 1007 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1008 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1009 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1010 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1011 break; 1012 case 6: 1013 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1014 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1015 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1016 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1017 break; 1018 case 7: 1019 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1020 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1021 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1022 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1023 break; 1024 default: 1025 a->row = row; 1026 a->icol = col; 1027 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1028 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1029 1030 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1031 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1032 PetscLogObjectParent(inA,a->icol); 1033 1034 if (!a->solve_work) { 1035 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1036 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1037 } 1038 } 1039 1040 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1041 1042 PetscFunctionReturn(0); 1043 } 1044 #endif 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1048 int MatPrintHelp_SeqSBAIJ(Mat A) 1049 { 1050 static PetscTruth called = PETSC_FALSE; 1051 MPI_Comm comm = A->comm; 1052 int ierr; 1053 1054 PetscFunctionBegin; 1055 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1056 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1057 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 EXTERN_C_BEGIN 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1064 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1065 { 1066 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1067 int i,nz,n; 1068 1069 PetscFunctionBegin; 1070 nz = baij->s_maxnz; 1071 n = mat->n; 1072 for (i=0; i<nz; i++) { 1073 baij->j[i] = indices[i]; 1074 } 1075 baij->s_nz = nz; 1076 for (i=0; i<n; i++) { 1077 baij->ilen[i] = baij->imax[i]; 1078 } 1079 1080 PetscFunctionReturn(0); 1081 } 1082 EXTERN_C_END 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1086 /*@ 1087 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1088 in the matrix. 1089 1090 Input Parameters: 1091 + mat - the SeqSBAIJ matrix 1092 - indices - the column indices 1093 1094 Level: advanced 1095 1096 Notes: 1097 This can be called if you have precomputed the nonzero structure of the 1098 matrix and want to provide it to the matrix object to improve the performance 1099 of the MatSetValues() operation. 1100 1101 You MUST have set the correct numbers of nonzeros per row in the call to 1102 MatCreateSeqSBAIJ(). 1103 1104 MUST be called before any calls to MatSetValues() 1105 1106 .seealso: MatCreateSeqSBAIJ 1107 @*/ 1108 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1109 { 1110 int ierr,(*f)(Mat,int *); 1111 1112 PetscFunctionBegin; 1113 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1114 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1115 if (f) { 1116 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1117 } else { 1118 SETERRQ(1,"Wrong type of matrix to set column indices"); 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1125 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1126 { 1127 int ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 /* -------------------------------------------------------------------*/ 1135 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1136 MatGetRow_SeqSBAIJ, 1137 MatRestoreRow_SeqSBAIJ, 1138 MatMult_SeqSBAIJ_N, 1139 MatMultAdd_SeqSBAIJ_N, 1140 MatMultTranspose_SeqSBAIJ, 1141 MatMultTransposeAdd_SeqSBAIJ, 1142 MatSolve_SeqSBAIJ_N, 1143 0, 1144 0, 1145 0, 1146 0, 1147 MatCholeskyFactor_SeqSBAIJ, 1148 MatRelax_SeqSBAIJ, 1149 MatTranspose_SeqSBAIJ, 1150 MatGetInfo_SeqSBAIJ, 1151 MatEqual_SeqSBAIJ, 1152 MatGetDiagonal_SeqSBAIJ, 1153 MatDiagonalScale_SeqSBAIJ, 1154 MatNorm_SeqSBAIJ, 1155 0, 1156 MatAssemblyEnd_SeqSBAIJ, 1157 0, 1158 MatSetOption_SeqSBAIJ, 1159 MatZeroEntries_SeqSBAIJ, 1160 MatZeroRows_SeqSBAIJ, 1161 0, 1162 0, 1163 MatCholeskyFactorSymbolic_SeqSBAIJ, 1164 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1165 MatSetUpPreallocation_SeqSBAIJ, 1166 0, 1167 MatICCFactorSymbolic_SeqSBAIJ, 1168 0, 1169 0, 1170 MatDuplicate_SeqSBAIJ, 1171 0, 1172 0, 1173 0, 1174 0, 1175 0, 1176 MatGetSubMatrices_SeqSBAIJ, 1177 MatIncreaseOverlap_SeqSBAIJ, 1178 MatGetValues_SeqSBAIJ, 1179 0, 1180 MatPrintHelp_SeqSBAIJ, 1181 MatScale_SeqSBAIJ, 1182 0, 1183 0, 1184 0, 1185 MatGetBlockSize_SeqSBAIJ, 1186 MatGetRowIJ_SeqSBAIJ, 1187 MatRestoreRowIJ_SeqSBAIJ, 1188 0, 1189 0, 1190 0, 1191 0, 1192 0, 1193 0, 1194 MatSetValuesBlocked_SeqSBAIJ, 1195 MatGetSubMatrix_SeqSBAIJ, 1196 0, 1197 0, 1198 MatGetPetscMaps_Petsc, 1199 0, 1200 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 MatGetRowMax_SeqSBAIJ}; 1206 1207 EXTERN_C_BEGIN 1208 #undef __FUNCT__ 1209 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1210 int MatStoreValues_SeqSBAIJ(Mat mat) 1211 { 1212 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1213 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1214 int ierr; 1215 1216 PetscFunctionBegin; 1217 if (aij->nonew != 1) { 1218 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1219 } 1220 1221 /* allocate space for values if not already there */ 1222 if (!aij->saved_values) { 1223 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1224 } 1225 1226 /* copy values over */ 1227 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 EXTERN_C_END 1231 1232 EXTERN_C_BEGIN 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1235 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1236 { 1237 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1238 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1239 1240 PetscFunctionBegin; 1241 if (aij->nonew != 1) { 1242 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1243 } 1244 if (!aij->saved_values) { 1245 SETERRQ(1,"Must call MatStoreValues(A);first"); 1246 } 1247 1248 /* copy values over */ 1249 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 EXTERN_C_END 1253 1254 EXTERN_C_BEGIN 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1257 int MatCreate_SeqSBAIJ(Mat B) 1258 { 1259 Mat_SeqSBAIJ *b; 1260 int ierr,size; 1261 1262 PetscFunctionBegin; 1263 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1264 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1265 B->m = B->M = PetscMax(B->m,B->M); 1266 B->n = B->N = PetscMax(B->n,B->N); 1267 1268 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1269 B->data = (void*)b; 1270 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1271 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1272 B->ops->destroy = MatDestroy_SeqSBAIJ; 1273 B->ops->view = MatView_SeqSBAIJ; 1274 B->factor = 0; 1275 B->lupivotthreshold = 1.0; 1276 B->mapping = 0; 1277 b->row = 0; 1278 b->icol = 0; 1279 b->reallocs = 0; 1280 b->saved_values = 0; 1281 1282 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1283 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1284 1285 b->sorted = PETSC_FALSE; 1286 b->roworiented = PETSC_TRUE; 1287 b->nonew = 0; 1288 b->diag = 0; 1289 b->solve_work = 0; 1290 b->mult_work = 0; 1291 B->spptr = 0; 1292 b->keepzeroedrows = PETSC_FALSE; 1293 1294 b->inew = 0; 1295 b->jnew = 0; 1296 b->anew = 0; 1297 b->a2anew = 0; 1298 b->permute = PETSC_FALSE; 1299 1300 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1301 "MatStoreValues_SeqSBAIJ", 1302 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1304 "MatRetrieveValues_SeqSBAIJ", 1305 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1307 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1308 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 EXTERN_C_END 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1315 /*@C 1316 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1317 compressed row) format. For good matrix assembly performance the 1318 user should preallocate the matrix storage by setting the parameter nz 1319 (or the array nnz). By setting these parameters accurately, performance 1320 during matrix assembly can be increased by more than a factor of 50. 1321 1322 Collective on Mat 1323 1324 Input Parameters: 1325 + A - the symmetric matrix 1326 . bs - size of block 1327 . nz - number of block nonzeros per block row (same for all rows) 1328 - nnz - array containing the number of block nonzeros in the various block rows 1329 (possibly different for each block row) or PETSC_NULL 1330 1331 Options Database Keys: 1332 . -mat_no_unroll - uses code that does not unroll the loops in the 1333 block calculations (much slower) 1334 . -mat_block_size - size of the blocks to use 1335 1336 Level: intermediate 1337 1338 Notes: 1339 The block AIJ format is compatible with standard Fortran 77 1340 storage. That is, the stored row and column indices can begin at 1341 either one (as in Fortran) or zero. See the users' manual for details. 1342 1343 Specify the preallocated storage with either nz or nnz (not both). 1344 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1345 allocation. For additional details, see the users manual chapter on 1346 matrices. 1347 1348 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1349 @*/ 1350 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1351 { 1352 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1353 int i,len,ierr,mbs,bs2; 1354 PetscTruth flg; 1355 int s_nz; 1356 1357 PetscFunctionBegin; 1358 B->preallocated = PETSC_TRUE; 1359 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1360 mbs = B->m/bs; 1361 bs2 = bs*bs; 1362 1363 if (mbs*bs != B->m) { 1364 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1365 } 1366 1367 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1368 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1369 if (nnz) { 1370 for (i=0; i<mbs; i++) { 1371 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1372 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1373 } 1374 } 1375 1376 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1377 if (!flg) { 1378 switch (bs) { 1379 case 1: 1380 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1381 B->ops->solve = MatSolve_SeqSBAIJ_1; 1382 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1383 B->ops->mult = MatMult_SeqSBAIJ_1; 1384 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1385 break; 1386 case 2: 1387 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1388 B->ops->solve = MatSolve_SeqSBAIJ_2; 1389 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1390 B->ops->mult = MatMult_SeqSBAIJ_2; 1391 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1392 break; 1393 case 3: 1394 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1395 B->ops->solve = MatSolve_SeqSBAIJ_3; 1396 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1397 B->ops->mult = MatMult_SeqSBAIJ_3; 1398 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1399 break; 1400 case 4: 1401 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1402 B->ops->solve = MatSolve_SeqSBAIJ_4; 1403 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1404 B->ops->mult = MatMult_SeqSBAIJ_4; 1405 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1406 break; 1407 case 5: 1408 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1409 B->ops->solve = MatSolve_SeqSBAIJ_5; 1410 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1411 B->ops->mult = MatMult_SeqSBAIJ_5; 1412 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1413 break; 1414 case 6: 1415 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1416 B->ops->solve = MatSolve_SeqSBAIJ_6; 1417 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1418 B->ops->mult = MatMult_SeqSBAIJ_6; 1419 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1420 break; 1421 case 7: 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1423 B->ops->solve = MatSolve_SeqSBAIJ_7; 1424 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1425 B->ops->mult = MatMult_SeqSBAIJ_7; 1426 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1427 break; 1428 } 1429 } 1430 1431 b->mbs = mbs; 1432 b->nbs = mbs; 1433 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1434 if (!nnz) { 1435 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1436 else if (nz <= 0) nz = 1; 1437 for (i=0; i<mbs; i++) { 1438 b->imax[i] = nz; 1439 } 1440 nz = nz*mbs; /* total nz */ 1441 } else { 1442 nz = 0; 1443 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1444 } 1445 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1446 s_nz = nz; 1447 1448 /* allocate the matrix space */ 1449 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1450 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1451 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1452 b->j = (int*)(b->a + s_nz*bs2); 1453 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1454 b->i = b->j + s_nz; 1455 b->singlemalloc = PETSC_TRUE; 1456 1457 /* pointer to beginning of each row */ 1458 b->i[0] = 0; 1459 for (i=1; i<mbs+1; i++) { 1460 b->i[i] = b->i[i-1] + b->imax[i-1]; 1461 } 1462 1463 /* b->ilen will count nonzeros in each block row so far. */ 1464 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1465 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1466 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1467 1468 b->bs = bs; 1469 b->bs2 = bs2; 1470 b->s_nz = 0; 1471 b->s_maxnz = s_nz*bs2; 1472 1473 b->inew = 0; 1474 b->jnew = 0; 1475 b->anew = 0; 1476 b->a2anew = 0; 1477 b->permute = PETSC_FALSE; 1478 PetscFunctionReturn(0); 1479 } 1480 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatCreateSeqSBAIJ" 1484 /*@C 1485 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1486 compressed row) format. For good matrix assembly performance the 1487 user should preallocate the matrix storage by setting the parameter nz 1488 (or the array nnz). By setting these parameters accurately, performance 1489 during matrix assembly can be increased by more than a factor of 50. 1490 1491 Collective on MPI_Comm 1492 1493 Input Parameters: 1494 + comm - MPI communicator, set to PETSC_COMM_SELF 1495 . bs - size of block 1496 . m - number of rows, or number of columns 1497 . nz - number of block nonzeros per block row (same for all rows) 1498 - nnz - array containing the number of block nonzeros in the various block rows 1499 (possibly different for each block row) or PETSC_NULL 1500 1501 Output Parameter: 1502 . A - the symmetric matrix 1503 1504 Options Database Keys: 1505 . -mat_no_unroll - uses code that does not unroll the loops in the 1506 block calculations (much slower) 1507 . -mat_block_size - size of the blocks to use 1508 1509 Level: intermediate 1510 1511 Notes: 1512 The block AIJ format is compatible with standard Fortran 77 1513 storage. That is, the stored row and column indices can begin at 1514 either one (as in Fortran) or zero. See the users' manual for details. 1515 1516 Specify the preallocated storage with either nz or nnz (not both). 1517 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1518 allocation. For additional details, see the users manual chapter on 1519 matrices. 1520 1521 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1522 @*/ 1523 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1524 { 1525 int ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1529 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1530 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1536 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1537 { 1538 Mat C; 1539 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1540 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1541 1542 PetscFunctionBegin; 1543 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1544 1545 *B = 0; 1546 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1547 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1548 c = (Mat_SeqSBAIJ*)C->data; 1549 1550 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1551 C->preallocated = PETSC_TRUE; 1552 C->factor = A->factor; 1553 c->row = 0; 1554 c->icol = 0; 1555 c->saved_values = 0; 1556 c->keepzeroedrows = a->keepzeroedrows; 1557 C->assembled = PETSC_TRUE; 1558 1559 c->bs = a->bs; 1560 c->bs2 = a->bs2; 1561 c->mbs = a->mbs; 1562 c->nbs = a->nbs; 1563 1564 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1565 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1566 for (i=0; i<mbs; i++) { 1567 c->imax[i] = a->imax[i]; 1568 c->ilen[i] = a->ilen[i]; 1569 } 1570 1571 /* allocate the matrix space */ 1572 c->singlemalloc = PETSC_TRUE; 1573 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1574 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1575 c->j = (int*)(c->a + nz*bs2); 1576 c->i = c->j + nz; 1577 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1578 if (mbs > 0) { 1579 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1580 if (cpvalues == MAT_COPY_VALUES) { 1581 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1582 } else { 1583 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1584 } 1585 } 1586 1587 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1588 c->sorted = a->sorted; 1589 c->roworiented = a->roworiented; 1590 c->nonew = a->nonew; 1591 1592 if (a->diag) { 1593 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1594 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1595 for (i=0; i<mbs; i++) { 1596 c->diag[i] = a->diag[i]; 1597 } 1598 } else c->diag = 0; 1599 c->s_nz = a->s_nz; 1600 c->s_maxnz = a->s_maxnz; 1601 c->solve_work = 0; 1602 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1603 c->mult_work = 0; 1604 *B = C; 1605 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 EXTERN_C_BEGIN 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1612 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1613 { 1614 Mat_SeqSBAIJ *a; 1615 Mat B; 1616 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1617 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1618 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1619 int *masked,nmask,tmp,bs2,ishift; 1620 PetscScalar *aa; 1621 MPI_Comm comm = ((PetscObject)viewer)->comm; 1622 1623 PetscFunctionBegin; 1624 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1625 bs2 = bs*bs; 1626 1627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1628 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1629 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1630 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1631 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1632 M = header[1]; N = header[2]; nz = header[3]; 1633 1634 if (header[3] < 0) { 1635 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1636 } 1637 1638 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1639 1640 /* 1641 This code adds extra rows to make sure the number of rows is 1642 divisible by the blocksize 1643 */ 1644 mbs = M/bs; 1645 extra_rows = bs - M + bs*(mbs); 1646 if (extra_rows == bs) extra_rows = 0; 1647 else mbs++; 1648 if (extra_rows) { 1649 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1650 } 1651 1652 /* read in row lengths */ 1653 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1654 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1655 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1656 1657 /* read in column indices */ 1658 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1659 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1660 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1661 1662 /* loop over row lengths determining block row lengths */ 1663 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1664 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1665 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1666 masked = mask + mbs; 1667 rowcount = 0; nzcount = 0; 1668 for (i=0; i<mbs; i++) { 1669 nmask = 0; 1670 for (j=0; j<bs; j++) { 1671 kmax = rowlengths[rowcount]; 1672 for (k=0; k<kmax; k++) { 1673 tmp = jj[nzcount++]/bs; /* block col. index */ 1674 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1675 } 1676 rowcount++; 1677 } 1678 s_browlengths[i] += nmask; 1679 1680 /* zero out the mask elements we set */ 1681 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1682 } 1683 1684 /* create our matrix */ 1685 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1686 B = *A; 1687 a = (Mat_SeqSBAIJ*)B->data; 1688 1689 /* set matrix "i" values */ 1690 a->i[0] = 0; 1691 for (i=1; i<= mbs; i++) { 1692 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1693 a->ilen[i-1] = s_browlengths[i-1]; 1694 } 1695 a->s_nz = a->i[mbs]; 1696 1697 /* read in nonzero values */ 1698 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1699 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1700 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1701 1702 /* set "a" and "j" values into matrix */ 1703 nzcount = 0; jcount = 0; 1704 for (i=0; i<mbs; i++) { 1705 nzcountb = nzcount; 1706 nmask = 0; 1707 for (j=0; j<bs; j++) { 1708 kmax = rowlengths[i*bs+j]; 1709 for (k=0; k<kmax; k++) { 1710 tmp = jj[nzcount++]/bs; /* block col. index */ 1711 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1712 } 1713 } 1714 /* sort the masked values */ 1715 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1716 1717 /* set "j" values into matrix */ 1718 maskcount = 1; 1719 for (j=0; j<nmask; j++) { 1720 a->j[jcount++] = masked[j]; 1721 mask[masked[j]] = maskcount++; 1722 } 1723 1724 /* set "a" values into matrix */ 1725 ishift = bs2*a->i[i]; 1726 for (j=0; j<bs; j++) { 1727 kmax = rowlengths[i*bs+j]; 1728 for (k=0; k<kmax; k++) { 1729 tmp = jj[nzcountb]/bs ; /* block col. index */ 1730 if (tmp >= i){ 1731 block = mask[tmp] - 1; 1732 point = jj[nzcountb] - bs*tmp; 1733 idx = ishift + bs2*block + j + bs*point; 1734 a->a[idx] = aa[nzcountb]; 1735 } 1736 nzcountb++; 1737 } 1738 } 1739 /* zero out the mask elements we set */ 1740 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1741 } 1742 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1743 1744 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1745 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1746 ierr = PetscFree(aa);CHKERRQ(ierr); 1747 ierr = PetscFree(jj);CHKERRQ(ierr); 1748 ierr = PetscFree(mask);CHKERRQ(ierr); 1749 1750 B->assembled = PETSC_TRUE; 1751 ierr = MatView_Private(B);CHKERRQ(ierr); 1752 PetscFunctionReturn(0); 1753 } 1754 EXTERN_C_END 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1758 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1759 { 1760 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1761 MatScalar *aa=a->a,*v,*v1; 1762 PetscScalar *x,*b,*t,sum,d; 1763 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1764 int nz,nz1,*vj,*vj1,i; 1765 1766 PetscFunctionBegin; 1767 its = its*lits; 1768 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1769 1770 if (bs > 1) 1771 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1772 1773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1774 if (xx != bb) { 1775 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1776 } else { 1777 b = x; 1778 } 1779 1780 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1781 1782 if (flag & SOR_ZERO_INITIAL_GUESS) { 1783 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1784 for (i=0; i<m; i++) 1785 t[i] = b[i]; 1786 1787 for (i=0; i<m; i++){ 1788 d = *(aa + ai[i]); /* diag[i] */ 1789 v = aa + ai[i] + 1; 1790 vj = aj + ai[i] + 1; 1791 nz = ai[i+1] - ai[i] - 1; 1792 x[i] = omega*t[i]/d; 1793 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1794 PetscLogFlops(2*nz-1); 1795 } 1796 } 1797 1798 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1799 for (i=0; i<m; i++) 1800 t[i] = b[i]; 1801 1802 for (i=0; i<m-1; i++){ /* update rhs */ 1803 v = aa + ai[i] + 1; 1804 vj = aj + ai[i] + 1; 1805 nz = ai[i+1] - ai[i] - 1; 1806 while (nz--) t[*vj++] -= x[i]*(*v++); 1807 PetscLogFlops(2*nz-1); 1808 } 1809 for (i=m-1; i>=0; i--){ 1810 d = *(aa + ai[i]); 1811 v = aa + ai[i] + 1; 1812 vj = aj + ai[i] + 1; 1813 nz = ai[i+1] - ai[i] - 1; 1814 sum = t[i]; 1815 while (nz--) sum -= x[*vj++]*(*v++); 1816 PetscLogFlops(2*nz-1); 1817 x[i] = (1-omega)*x[i] + omega*sum/d; 1818 } 1819 } 1820 its--; 1821 } 1822 1823 while (its--) { 1824 /* 1825 forward sweep: 1826 for i=0,...,m-1: 1827 sum[i] = (b[i] - U(i,:)x )/d[i]; 1828 x[i] = (1-omega)x[i] + omega*sum[i]; 1829 b = b - x[i]*U^T(i,:); 1830 1831 */ 1832 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1833 for (i=0; i<m; i++) 1834 t[i] = b[i]; 1835 1836 for (i=0; i<m; i++){ 1837 d = *(aa + ai[i]); /* diag[i] */ 1838 v = aa + ai[i] + 1; v1=v; 1839 vj = aj + ai[i] + 1; vj1=vj; 1840 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1841 sum = t[i]; 1842 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1843 x[i] = (1-omega)*x[i] + omega*sum/d; 1844 while (nz--) t[*vj++] -= x[i]*(*v++); 1845 PetscLogFlops(4*nz-2); 1846 } 1847 } 1848 1849 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1850 /* 1851 backward sweep: 1852 b = b - x[i]*U^T(i,:), i=0,...,n-2 1853 for i=m-1,...,0: 1854 sum[i] = (b[i] - U(i,:)x )/d[i]; 1855 x[i] = (1-omega)x[i] + omega*sum[i]; 1856 */ 1857 for (i=0; i<m; i++) 1858 t[i] = b[i]; 1859 1860 for (i=0; i<m-1; i++){ /* update rhs */ 1861 v = aa + ai[i] + 1; 1862 vj = aj + ai[i] + 1; 1863 nz = ai[i+1] - ai[i] - 1; 1864 while (nz--) t[*vj++] -= x[i]*(*v++); 1865 PetscLogFlops(2*nz-1); 1866 } 1867 for (i=m-1; i>=0; i--){ 1868 d = *(aa + ai[i]); 1869 v = aa + ai[i] + 1; 1870 vj = aj + ai[i] + 1; 1871 nz = ai[i+1] - ai[i] - 1; 1872 sum = t[i]; 1873 while (nz--) sum -= x[*vj++]*(*v++); 1874 PetscLogFlops(2*nz-1); 1875 x[i] = (1-omega)*x[i] + omega*sum/d; 1876 } 1877 } 1878 } 1879 1880 ierr = PetscFree(t); CHKERRQ(ierr); 1881 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1882 if (bb != xx) { 1883 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1884 } 1885 1886 PetscFunctionReturn(0); 1887 } 1888 1889 1890 1891 1892 1893 1894