1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 int *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 int i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 case MAT_NOT_SYMMETRIC: 183 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 184 case MAT_HERMITIAN: 185 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 186 case MAT_SYMMETRIC: 187 case MAT_STRUCTURALLY_SYMMETRIC: 188 case MAT_NOT_HERMITIAN: 189 case MAT_SYMMETRY_ETERNAL: 190 case MAT_NOT_SYMMETRY_ETERNAL: 191 break; 192 default: 193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 PetscErrorCode ierr; 204 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 205 MatScalar *aa,*aa_i; 206 PetscScalar *v_i; 207 208 PetscFunctionBegin; 209 bs = a->bs; 210 ai = a->i; 211 aj = a->j; 212 aa = a->a; 213 bs2 = a->bs2; 214 215 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 216 217 bn = row/bs; /* Block number */ 218 bp = row % bs; /* Block position */ 219 M = ai[bn+1] - ai[bn]; 220 *ncols = bs*M; 221 222 if (v) { 223 *v = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 v_i = *v + i*bs; 228 aa_i = aa + bs2*(ai[bn] + i); 229 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 230 } 231 } 232 } 233 234 if (cols) { 235 *cols = 0; 236 if (*ncols) { 237 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 238 for (i=0; i<M; i++) { /* for each block in the block row */ 239 cols_i = *cols + i*bs; 240 itmp = bs*aj[ai[bn] + i]; 241 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 242 } 243 } 244 } 245 246 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 247 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 248 #ifdef column_search 249 v_i = *v + M*bs; 250 cols_i = *cols + M*bs; 251 for (i=0; i<bn; i++){ /* for each block row */ 252 M = ai[i+1] - ai[i]; 253 for (j=0; j<M; j++){ 254 itmp = aj[ai[i] + j]; /* block column value */ 255 if (itmp == bn){ 256 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 257 for (k=0; k<bs; k++) { 258 *cols_i++ = i*bs+k; 259 *v_i++ = aa_i[k]; 260 } 261 *ncols += bs; 262 break; 263 } 264 } 265 } 266 #endif 267 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 273 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 274 { 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 279 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 285 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 286 { 287 PetscErrorCode ierr; 288 PetscFunctionBegin; 289 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 295 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 296 { 297 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 298 PetscErrorCode ierr; 299 int i,j,bs = a->bs,k,l,bs2=a->bs2; 300 char *name; 301 PetscViewerFormat format; 302 303 PetscFunctionBegin; 304 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 305 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 306 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 307 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 308 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 309 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 310 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 311 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 #if defined(PETSC_USE_COMPLEX) 318 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 320 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 322 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 323 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 324 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 325 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 326 } 327 #else 328 if (a->a[bs2*k + l*bs + j] != 0.0) { 329 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 330 } 331 #endif 332 } 333 } 334 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 338 } else { 339 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 340 for (i=0; i<a->mbs; i++) { 341 for (j=0; j<bs; j++) { 342 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 343 for (k=a->i[i]; k<a->i[i+1]; k++) { 344 for (l=0; l<bs; l++) { 345 #if defined(PETSC_USE_COMPLEX) 346 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 348 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 350 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 351 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 352 } else { 353 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 354 } 355 #else 356 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 357 #endif 358 } 359 } 360 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 361 } 362 } 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 364 } 365 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 371 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 372 { 373 Mat A = (Mat) Aa; 374 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 375 PetscErrorCode ierr; 376 int row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 377 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 378 MatScalar *aa; 379 MPI_Comm comm; 380 PetscViewer viewer; 381 382 PetscFunctionBegin; 383 /* 384 This is nasty. If this is called from an originally parallel matrix 385 then all processes call this,but only the first has the matrix so the 386 rest should return immediately. 387 */ 388 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 389 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 390 if (rank) PetscFunctionReturn(0); 391 392 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 393 394 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 395 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 396 397 /* loop over matrix elements drawing boxes */ 398 color = PETSC_DRAW_BLUE; 399 for (i=0,row=0; i<mbs; i++,row+=bs) { 400 for (j=a->i[i]; j<a->i[i+1]; j++) { 401 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 402 x_l = a->j[j]*bs; x_r = x_l + 1.0; 403 aa = a->a + j*bs2; 404 for (k=0; k<bs; k++) { 405 for (l=0; l<bs; l++) { 406 if (PetscRealPart(*aa++) >= 0.) continue; 407 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 408 } 409 } 410 } 411 } 412 color = PETSC_DRAW_CYAN; 413 for (i=0,row=0; i<mbs; i++,row+=bs) { 414 for (j=a->i[i]; j<a->i[i+1]; j++) { 415 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 416 x_l = a->j[j]*bs; x_r = x_l + 1.0; 417 aa = a->a + j*bs2; 418 for (k=0; k<bs; k++) { 419 for (l=0; l<bs; l++) { 420 if (PetscRealPart(*aa++) != 0.) continue; 421 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 422 } 423 } 424 } 425 } 426 427 color = PETSC_DRAW_RED; 428 for (i=0,row=0; i<mbs; i++,row+=bs) { 429 for (j=a->i[i]; j<a->i[i+1]; j++) { 430 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 431 x_l = a->j[j]*bs; x_r = x_l + 1.0; 432 aa = a->a + j*bs2; 433 for (k=0; k<bs; k++) { 434 for (l=0; l<bs; l++) { 435 if (PetscRealPart(*aa++) <= 0.) continue; 436 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 437 } 438 } 439 } 440 } 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 446 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 447 { 448 PetscErrorCode ierr; 449 PetscReal xl,yl,xr,yr,w,h; 450 PetscDraw draw; 451 PetscTruth isnull; 452 453 PetscFunctionBegin; 454 455 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 456 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 457 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 459 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 460 xr += w; yr += h; xl = -w; yl = -h; 461 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 462 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 463 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatView_SeqSBAIJ" 469 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 470 { 471 PetscErrorCode ierr; 472 PetscTruth iascii,isdraw; 473 474 PetscFunctionBegin; 475 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 476 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 477 if (iascii){ 478 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 479 } else if (isdraw) { 480 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 481 } else { 482 Mat B; 483 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 484 ierr = MatView(B,viewer);CHKERRQ(ierr); 485 ierr = MatDestroy(B);CHKERRQ(ierr); 486 } 487 PetscFunctionReturn(0); 488 } 489 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 493 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 494 { 495 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 496 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 497 int *ai = a->i,*ailen = a->ilen; 498 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 499 MatScalar *ap,*aa = a->a,zero = 0.0; 500 501 PetscFunctionBegin; 502 for (k=0; k<m; k++) { /* loop over rows */ 503 row = im[k]; brow = row/bs; 504 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 505 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 506 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 507 nrow = ailen[brow]; 508 for (l=0; l<n; l++) { /* loop over columns */ 509 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 510 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 511 col = in[l] ; 512 bcol = col/bs; 513 cidx = col%bs; 514 ridx = row%bs; 515 high = nrow; 516 low = 0; /* assume unsorted */ 517 while (high-low > 5) { 518 t = (low+high)/2; 519 if (rp[t] > bcol) high = t; 520 else low = t; 521 } 522 for (i=low; i<high; i++) { 523 if (rp[i] > bcol) break; 524 if (rp[i] == bcol) { 525 *v++ = ap[bs2*i+bs*cidx+ridx]; 526 goto finished; 527 } 528 } 529 *v++ = zero; 530 finished:; 531 } 532 } 533 PetscFunctionReturn(0); 534 } 535 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 539 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 540 { 541 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 542 PetscErrorCode ierr; 543 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 544 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 545 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 546 PetscTruth roworiented=a->roworiented; 547 const MatScalar *value = v; 548 MatScalar *ap,*aa = a->a,*bap; 549 550 PetscFunctionBegin; 551 if (roworiented) { 552 stepval = (n-1)*bs; 553 } else { 554 stepval = (m-1)*bs; 555 } 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 if (row < 0) continue; 559 #if defined(PETSC_USE_BOPT_g) 560 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 561 #endif 562 rp = aj + ai[row]; 563 ap = aa + bs2*ai[row]; 564 rmax = imax[row]; 565 nrow = ailen[row]; 566 low = 0; 567 for (l=0; l<n; l++) { /* loop over added columns */ 568 if (in[l] < 0) continue; 569 col = in[l]; 570 #if defined(PETSC_USE_BOPT_g) 571 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 572 #endif 573 if (col < row) continue; /* ignore lower triangular block */ 574 if (roworiented) { 575 value = v + k*(stepval+bs)*bs + l*bs; 576 } else { 577 value = v + l*(stepval+bs)*bs + k*bs; 578 } 579 if (!sorted) low = 0; high = nrow; 580 while (high-low > 7) { 581 t = (low+high)/2; 582 if (rp[t] > col) high = t; 583 else low = t; 584 } 585 for (i=low; i<high; i++) { 586 if (rp[i] > col) break; 587 if (rp[i] == col) { 588 bap = ap + bs2*i; 589 if (roworiented) { 590 if (is == ADD_VALUES) { 591 for (ii=0; ii<bs; ii++,value+=stepval) { 592 for (jj=ii; jj<bs2; jj+=bs) { 593 bap[jj] += *value++; 594 } 595 } 596 } else { 597 for (ii=0; ii<bs; ii++,value+=stepval) { 598 for (jj=ii; jj<bs2; jj+=bs) { 599 bap[jj] = *value++; 600 } 601 } 602 } 603 } else { 604 if (is == ADD_VALUES) { 605 for (ii=0; ii<bs; ii++,value+=stepval) { 606 for (jj=0; jj<bs; jj++) { 607 *bap++ += *value++; 608 } 609 } 610 } else { 611 for (ii=0; ii<bs; ii++,value+=stepval) { 612 for (jj=0; jj<bs; jj++) { 613 *bap++ = *value++; 614 } 615 } 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 if (nonew == 1) goto noinsert2; 622 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 623 if (nrow >= rmax) { 624 /* there is no extra room in row, therefore enlarge */ 625 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 626 MatScalar *new_a; 627 628 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 629 630 /* malloc new storage space */ 631 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 632 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 633 new_j = (int*)(new_a + bs2*new_nz); 634 new_i = new_j + new_nz; 635 636 /* copy over old data into new slots */ 637 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 638 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 639 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 640 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 641 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 642 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 643 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 644 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 645 /* free up old matrix storage */ 646 ierr = PetscFree(a->a);CHKERRQ(ierr); 647 if (!a->singlemalloc) { 648 ierr = PetscFree(a->i);CHKERRQ(ierr); 649 ierr = PetscFree(a->j);CHKERRQ(ierr); 650 } 651 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 652 a->singlemalloc = PETSC_TRUE; 653 654 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 655 rmax = imax[row] = imax[row] + CHUNKSIZE; 656 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 657 a->maxnz += bs2*CHUNKSIZE; 658 a->reallocs++; 659 a->nz++; 660 } 661 N = nrow++ - 1; 662 /* shift up all the later entries in this row */ 663 for (ii=N; ii>=i; ii--) { 664 rp[ii+1] = rp[ii]; 665 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 666 } 667 if (N >= i) { 668 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 669 } 670 rp[i] = col; 671 bap = ap + bs2*i; 672 if (roworiented) { 673 for (ii=0; ii<bs; ii++,value+=stepval) { 674 for (jj=ii; jj<bs2; jj+=bs) { 675 bap[jj] = *value++; 676 } 677 } 678 } else { 679 for (ii=0; ii<bs; ii++,value+=stepval) { 680 for (jj=0; jj<bs; jj++) { 681 *bap++ = *value++; 682 } 683 } 684 } 685 noinsert2:; 686 low = i; 687 } 688 ailen[row] = nrow; 689 } 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 695 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 696 { 697 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 698 PetscErrorCode ierr; 699 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 700 int m = A->m,*ip,N,*ailen = a->ilen; 701 int mbs = a->mbs,bs2 = a->bs2,rmax = 0; 702 MatScalar *aa = a->a,*ap; 703 704 PetscFunctionBegin; 705 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 706 707 if (m) rmax = ailen[0]; 708 for (i=1; i<mbs; i++) { 709 /* move each row back by the amount of empty slots (fshift) before it*/ 710 fshift += imax[i-1] - ailen[i-1]; 711 rmax = PetscMax(rmax,ailen[i]); 712 if (fshift) { 713 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 714 N = ailen[i]; 715 for (j=0; j<N; j++) { 716 ip[j-fshift] = ip[j]; 717 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 718 } 719 } 720 ai[i] = ai[i-1] + ailen[i-1]; 721 } 722 if (mbs) { 723 fshift += imax[mbs-1] - ailen[mbs-1]; 724 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 725 } 726 /* reset ilen and imax for each row */ 727 for (i=0; i<mbs; i++) { 728 ailen[i] = imax[i] = ai[i+1] - ai[i]; 729 } 730 a->nz = ai[mbs]; 731 732 /* diagonals may have moved, reset it */ 733 if (a->diag) { 734 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 735 } 736 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 737 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 738 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 739 a->reallocs); 740 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 741 a->reallocs = 0; 742 A->info.nz_unneeded = (PetscReal)fshift*bs2; 743 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 This function returns an array of flags which indicate the locations of contiguous 749 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 750 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 751 Assume: sizes should be long enough to hold all the values. 752 */ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 756 { 757 int i,j,k,row; 758 PetscTruth flg; 759 760 PetscFunctionBegin; 761 for (i=0,j=0; i<n; j++) { 762 row = idx[i]; 763 if (row%bs!=0) { /* Not the begining of a block */ 764 sizes[j] = 1; 765 i++; 766 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 767 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 768 i++; 769 } else { /* Begining of the block, so check if the complete block exists */ 770 flg = PETSC_TRUE; 771 for (k=1; k<bs; k++) { 772 if (row+k != idx[i+k]) { /* break in the block */ 773 flg = PETSC_FALSE; 774 break; 775 } 776 } 777 if (flg == PETSC_TRUE) { /* No break in the bs */ 778 sizes[j] = bs; 779 i+= bs; 780 } else { 781 sizes[j] = 1; 782 i++; 783 } 784 } 785 } 786 *bs_max = j; 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 792 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 793 { 794 PetscFunctionBegin; 795 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 796 } 797 798 /* Only add/insert a(i,j) with i<=j (blocks). 799 Any a(i,j) with i>j input by user is ingored. 800 */ 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 804 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 805 { 806 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 807 PetscErrorCode ierr; 808 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 809 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 810 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 811 int ridx,cidx,bs2=a->bs2; 812 MatScalar *ap,value,*aa=a->a,*bap; 813 814 PetscFunctionBegin; 815 816 for (k=0; k<m; k++) { /* loop over added rows */ 817 row = im[k]; /* row number */ 818 brow = row/bs; /* block row number */ 819 if (row < 0) continue; 820 #if defined(PETSC_USE_BOPT_g) 821 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 822 #endif 823 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 824 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 825 rmax = imax[brow]; /* maximum space allocated for this row */ 826 nrow = ailen[brow]; /* actual length of this row */ 827 low = 0; 828 829 for (l=0; l<n; l++) { /* loop over added columns */ 830 if (in[l] < 0) continue; 831 #if defined(PETSC_USE_BOPT_g) 832 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 833 #endif 834 col = in[l]; 835 bcol = col/bs; /* block col number */ 836 837 if (brow <= bcol){ 838 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 839 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 840 /* element value a(k,l) */ 841 if (roworiented) { 842 value = v[l + k*n]; 843 } else { 844 value = v[k + l*m]; 845 } 846 847 /* move pointer bap to a(k,l) quickly and add/insert value */ 848 if (!sorted) low = 0; high = nrow; 849 while (high-low > 7) { 850 t = (low+high)/2; 851 if (rp[t] > bcol) high = t; 852 else low = t; 853 } 854 for (i=low; i<high; i++) { 855 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 856 if (rp[i] > bcol) break; 857 if (rp[i] == bcol) { 858 bap = ap + bs2*i + bs*cidx + ridx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 862 if (brow == bcol && ridx < cidx){ 863 bap = ap + bs2*i + bs*ridx + cidx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 } 867 goto noinsert1; 868 } 869 } 870 871 if (nonew == 1) goto noinsert1; 872 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 873 if (nrow >= rmax) { 874 /* there is no extra room in row, therefore enlarge */ 875 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 876 MatScalar *new_a; 877 878 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 879 880 /* Malloc new storage space */ 881 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 882 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 883 new_j = (int*)(new_a + bs2*new_nz); 884 new_i = new_j + new_nz; 885 886 /* copy over old data into new slots */ 887 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 888 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 889 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 890 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 891 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 892 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 893 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 894 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 895 /* free up old matrix storage */ 896 ierr = PetscFree(a->a);CHKERRQ(ierr); 897 if (!a->singlemalloc) { 898 ierr = PetscFree(a->i);CHKERRQ(ierr); 899 ierr = PetscFree(a->j);CHKERRQ(ierr); 900 } 901 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 902 a->singlemalloc = PETSC_TRUE; 903 904 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 905 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 906 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 907 a->maxnz += bs2*CHUNKSIZE; 908 a->reallocs++; 909 a->nz++; 910 } 911 912 N = nrow++ - 1; 913 /* shift up all the later entries in this row */ 914 for (ii=N; ii>=i; ii--) { 915 rp[ii+1] = rp[ii]; 916 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 917 } 918 if (N>=i) { 919 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 920 } 921 rp[i] = bcol; 922 ap[bs2*i + bs*cidx + ridx] = value; 923 noinsert1:; 924 low = i; 925 /* } */ 926 } 927 } /* end of if .. if.. */ 928 } /* end of loop over added columns */ 929 ailen[brow] = nrow; 930 } /* end of loop over added rows */ 931 932 PetscFunctionReturn(0); 933 } 934 935 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 936 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 937 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 938 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 939 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 940 EXTERN PetscErrorCode MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 942 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 943 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 944 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 945 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 946 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 948 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 949 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 950 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 965 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 966 967 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 968 969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 973 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 975 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 977 978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 982 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 983 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 984 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 985 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 986 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 987 988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 992 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 993 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 994 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 995 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 996 997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1001 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1002 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1003 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1004 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1005 1006 #ifdef HAVE_MatICCFactor 1007 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1010 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1011 { 1012 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1013 Mat outA; 1014 PetscErrorCode ierr; 1015 PetscTruth row_identity,col_identity; 1016 1017 PetscFunctionBegin; 1018 /* 1019 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1020 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1021 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1022 if (!row_identity || !col_identity) { 1023 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1024 } 1025 */ 1026 1027 outA = inA; 1028 inA->factor = FACTOR_CHOLESKY; 1029 1030 if (!a->diag) { 1031 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1032 } 1033 /* 1034 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1035 for ILU(0) factorization with natural ordering 1036 */ 1037 switch (a->bs) { 1038 case 1: 1039 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1040 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1041 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1042 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1043 case 2: 1044 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1045 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1046 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1047 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1048 break; 1049 case 3: 1050 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1051 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1052 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1053 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1054 break; 1055 case 4: 1056 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1057 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1058 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1059 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1060 break; 1061 case 5: 1062 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1063 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1064 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1065 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1066 break; 1067 case 6: 1068 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1069 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1070 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1071 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1072 break; 1073 case 7: 1074 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1075 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1076 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1077 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1078 break; 1079 default: 1080 a->row = row; 1081 a->icol = col; 1082 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1084 1085 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1086 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1087 PetscLogObjectParent(inA,a->icol); 1088 1089 if (!a->solve_work) { 1090 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1091 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1092 } 1093 } 1094 1095 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 #endif 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1103 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1104 { 1105 static PetscTruth called = PETSC_FALSE; 1106 MPI_Comm comm = A->comm; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1111 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1112 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 EXTERN_C_BEGIN 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1119 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1120 { 1121 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1122 int i,nz,n; 1123 1124 PetscFunctionBegin; 1125 nz = baij->maxnz; 1126 n = mat->n; 1127 for (i=0; i<nz; i++) { 1128 baij->j[i] = indices[i]; 1129 } 1130 baij->nz = nz; 1131 for (i=0; i<n; i++) { 1132 baij->ilen[i] = baij->imax[i]; 1133 } 1134 1135 PetscFunctionReturn(0); 1136 } 1137 EXTERN_C_END 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1141 /*@ 1142 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1143 in the matrix. 1144 1145 Input Parameters: 1146 + mat - the SeqSBAIJ matrix 1147 - indices - the column indices 1148 1149 Level: advanced 1150 1151 Notes: 1152 This can be called if you have precomputed the nonzero structure of the 1153 matrix and want to provide it to the matrix object to improve the performance 1154 of the MatSetValues() operation. 1155 1156 You MUST have set the correct numbers of nonzeros per row in the call to 1157 MatCreateSeqSBAIJ(). 1158 1159 MUST be called before any calls to MatSetValues() 1160 1161 .seealso: MatCreateSeqSBAIJ 1162 @*/ 1163 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1164 { 1165 PetscErrorCode ierr,(*f)(Mat,int *); 1166 1167 PetscFunctionBegin; 1168 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1169 PetscValidPointer(indices,2); 1170 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1171 if (f) { 1172 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1173 } else { 1174 SETERRQ(1,"Wrong type of matrix to set column indices"); 1175 } 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1181 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1182 { 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1192 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1193 { 1194 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1195 PetscFunctionBegin; 1196 *array = a->a; 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1202 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1203 { 1204 PetscFunctionBegin; 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #include "petscblaslapack.h" 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1211 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1212 { 1213 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1214 PetscErrorCode ierr; 1215 int one=1,i,bs=y->bs,bs2,j; 1216 1217 PetscFunctionBegin; 1218 if (str == SAME_NONZERO_PATTERN) { 1219 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1220 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1221 if (y->xtoy && y->XtoY != X) { 1222 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1223 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1224 } 1225 if (!y->xtoy) { /* get xtoy */ 1226 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1227 y->XtoY = X; 1228 } 1229 bs2 = bs*bs; 1230 for (i=0; i<x->nz; i++) { 1231 j = 0; 1232 while (j < bs2){ 1233 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1234 j++; 1235 } 1236 } 1237 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1238 } else { 1239 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1246 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1247 { 1248 PetscFunctionBegin; 1249 *flg = PETSC_TRUE; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1255 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1256 { 1257 PetscFunctionBegin; 1258 *flg = PETSC_TRUE; 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1264 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1265 { 1266 PetscFunctionBegin; 1267 *flg = PETSC_FALSE; 1268 PetscFunctionReturn(0); 1269 } 1270 1271 /* -------------------------------------------------------------------*/ 1272 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1273 MatGetRow_SeqSBAIJ, 1274 MatRestoreRow_SeqSBAIJ, 1275 MatMult_SeqSBAIJ_N, 1276 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1277 MatMultTranspose_SeqSBAIJ, 1278 MatMultTransposeAdd_SeqSBAIJ, 1279 MatSolve_SeqSBAIJ_N, 1280 0, 1281 0, 1282 /*10*/ 0, 1283 0, 1284 MatCholeskyFactor_SeqSBAIJ, 1285 MatRelax_SeqSBAIJ, 1286 MatTranspose_SeqSBAIJ, 1287 /*15*/ MatGetInfo_SeqSBAIJ, 1288 MatEqual_SeqSBAIJ, 1289 MatGetDiagonal_SeqSBAIJ, 1290 MatDiagonalScale_SeqSBAIJ, 1291 MatNorm_SeqSBAIJ, 1292 /*20*/ 0, 1293 MatAssemblyEnd_SeqSBAIJ, 1294 0, 1295 MatSetOption_SeqSBAIJ, 1296 MatZeroEntries_SeqSBAIJ, 1297 /*25*/ MatZeroRows_SeqSBAIJ, 1298 0, 1299 0, 1300 MatCholeskyFactorSymbolic_SeqSBAIJ, 1301 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1302 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1303 0, 1304 MatICCFactorSymbolic_SeqSBAIJ, 1305 MatGetArray_SeqSBAIJ, 1306 MatRestoreArray_SeqSBAIJ, 1307 /*35*/ MatDuplicate_SeqSBAIJ, 1308 0, 1309 0, 1310 0, 1311 0, 1312 /*40*/ MatAXPY_SeqSBAIJ, 1313 MatGetSubMatrices_SeqSBAIJ, 1314 MatIncreaseOverlap_SeqSBAIJ, 1315 MatGetValues_SeqSBAIJ, 1316 0, 1317 /*45*/ MatPrintHelp_SeqSBAIJ, 1318 MatScale_SeqSBAIJ, 1319 0, 1320 0, 1321 0, 1322 /*50*/ MatGetBlockSize_SeqSBAIJ, 1323 MatGetRowIJ_SeqSBAIJ, 1324 MatRestoreRowIJ_SeqSBAIJ, 1325 0, 1326 0, 1327 /*55*/ 0, 1328 0, 1329 0, 1330 0, 1331 MatSetValuesBlocked_SeqSBAIJ, 1332 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1333 0, 1334 0, 1335 MatGetPetscMaps_Petsc, 1336 0, 1337 /*65*/ 0, 1338 0, 1339 0, 1340 0, 1341 0, 1342 /*70*/ MatGetRowMax_SeqSBAIJ, 1343 0, 1344 0, 1345 0, 1346 0, 1347 /*75*/ 0, 1348 0, 1349 0, 1350 0, 1351 0, 1352 /*80*/ 0, 1353 0, 1354 0, 1355 #if !defined(PETSC_USE_COMPLEX) 1356 MatGetInertia_SeqSBAIJ, 1357 #else 1358 0, 1359 #endif 1360 /*85*/ MatLoad_SeqSBAIJ, 1361 MatIsSymmetric_SeqSBAIJ, 1362 MatIsStructurallySymmetric_SeqSBAIJ, 1363 MatIsHermitian_SeqSBAIJ 1364 }; 1365 1366 EXTERN_C_BEGIN 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1369 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1370 { 1371 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1372 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1373 PetscErrorCode ierr; 1374 1375 PetscFunctionBegin; 1376 if (aij->nonew != 1) { 1377 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1378 } 1379 1380 /* allocate space for values if not already there */ 1381 if (!aij->saved_values) { 1382 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1383 } 1384 1385 /* copy values over */ 1386 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1387 PetscFunctionReturn(0); 1388 } 1389 EXTERN_C_END 1390 1391 EXTERN_C_BEGIN 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1394 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1395 { 1396 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1397 PetscErrorCode ierr; 1398 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1399 1400 PetscFunctionBegin; 1401 if (aij->nonew != 1) { 1402 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1403 } 1404 if (!aij->saved_values) { 1405 SETERRQ(1,"Must call MatStoreValues(A);first"); 1406 } 1407 1408 /* copy values over */ 1409 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 EXTERN_C_END 1413 1414 EXTERN_C_BEGIN 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1417 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1418 { 1419 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1420 PetscErrorCode ierr; 1421 int i,len,mbs,bs2; 1422 PetscTruth flg; 1423 1424 PetscFunctionBegin; 1425 B->preallocated = PETSC_TRUE; 1426 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1427 mbs = B->m/bs; 1428 bs2 = bs*bs; 1429 1430 if (mbs*bs != B->m) { 1431 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1432 } 1433 1434 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1435 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1436 if (nnz) { 1437 for (i=0; i<mbs; i++) { 1438 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1439 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1440 } 1441 } 1442 1443 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1444 if (!flg) { 1445 switch (bs) { 1446 case 1: 1447 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1448 B->ops->solve = MatSolve_SeqSBAIJ_1; 1449 B->ops->solves = MatSolves_SeqSBAIJ_1; 1450 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1451 B->ops->mult = MatMult_SeqSBAIJ_1; 1452 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1453 break; 1454 case 2: 1455 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1456 B->ops->solve = MatSolve_SeqSBAIJ_2; 1457 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1458 B->ops->mult = MatMult_SeqSBAIJ_2; 1459 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1460 break; 1461 case 3: 1462 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1463 B->ops->solve = MatSolve_SeqSBAIJ_3; 1464 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1465 B->ops->mult = MatMult_SeqSBAIJ_3; 1466 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1467 break; 1468 case 4: 1469 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1470 B->ops->solve = MatSolve_SeqSBAIJ_4; 1471 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1472 B->ops->mult = MatMult_SeqSBAIJ_4; 1473 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1474 break; 1475 case 5: 1476 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1477 B->ops->solve = MatSolve_SeqSBAIJ_5; 1478 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1479 B->ops->mult = MatMult_SeqSBAIJ_5; 1480 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1481 break; 1482 case 6: 1483 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1484 B->ops->solve = MatSolve_SeqSBAIJ_6; 1485 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1486 B->ops->mult = MatMult_SeqSBAIJ_6; 1487 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1488 break; 1489 case 7: 1490 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1491 B->ops->solve = MatSolve_SeqSBAIJ_7; 1492 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1493 B->ops->mult = MatMult_SeqSBAIJ_7; 1494 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1495 break; 1496 } 1497 } 1498 1499 b->mbs = mbs; 1500 b->nbs = mbs; 1501 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1502 if (!nnz) { 1503 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1504 else if (nz <= 0) nz = 1; 1505 for (i=0; i<mbs; i++) { 1506 b->imax[i] = nz; 1507 } 1508 nz = nz*mbs; /* total nz */ 1509 } else { 1510 nz = 0; 1511 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1512 } 1513 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1514 1515 /* allocate the matrix space */ 1516 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1517 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1518 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1519 b->j = (int*)(b->a + nz*bs2); 1520 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1521 b->i = b->j + nz; 1522 b->singlemalloc = PETSC_TRUE; 1523 1524 /* pointer to beginning of each row */ 1525 b->i[0] = 0; 1526 for (i=1; i<mbs+1; i++) { 1527 b->i[i] = b->i[i-1] + b->imax[i-1]; 1528 } 1529 1530 /* b->ilen will count nonzeros in each block row so far. */ 1531 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1532 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1533 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1534 1535 b->bs = bs; 1536 b->bs2 = bs2; 1537 b->nz = 0; 1538 b->maxnz = nz*bs2; 1539 1540 b->inew = 0; 1541 b->jnew = 0; 1542 b->anew = 0; 1543 b->a2anew = 0; 1544 b->permute = PETSC_FALSE; 1545 PetscFunctionReturn(0); 1546 } 1547 EXTERN_C_END 1548 1549 EXTERN_C_BEGIN 1550 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1551 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1552 EXTERN_C_END 1553 1554 /*MC 1555 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1556 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1557 1558 Options Database Keys: 1559 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1560 1561 Level: beginner 1562 1563 .seealso: MatCreateSeqSBAIJ 1564 M*/ 1565 1566 EXTERN_C_BEGIN 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1569 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1570 { 1571 Mat_SeqSBAIJ *b; 1572 PetscErrorCode ierr; 1573 int size; 1574 1575 PetscFunctionBegin; 1576 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1577 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1578 B->m = B->M = PetscMax(B->m,B->M); 1579 B->n = B->N = PetscMax(B->n,B->N); 1580 1581 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1582 B->data = (void*)b; 1583 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1584 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1585 B->ops->destroy = MatDestroy_SeqSBAIJ; 1586 B->ops->view = MatView_SeqSBAIJ; 1587 B->factor = 0; 1588 B->lupivotthreshold = 1.0; 1589 B->mapping = 0; 1590 b->row = 0; 1591 b->icol = 0; 1592 b->reallocs = 0; 1593 b->saved_values = 0; 1594 1595 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1596 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1597 1598 b->sorted = PETSC_FALSE; 1599 b->roworiented = PETSC_TRUE; 1600 b->nonew = 0; 1601 b->diag = 0; 1602 b->solve_work = 0; 1603 b->mult_work = 0; 1604 B->spptr = 0; 1605 b->keepzeroedrows = PETSC_FALSE; 1606 b->xtoy = 0; 1607 b->XtoY = 0; 1608 1609 b->inew = 0; 1610 b->jnew = 0; 1611 b->anew = 0; 1612 b->a2anew = 0; 1613 b->permute = PETSC_FALSE; 1614 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1616 "MatStoreValues_SeqSBAIJ", 1617 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1619 "MatRetrieveValues_SeqSBAIJ", 1620 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1622 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1623 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1625 "MatConvert_SeqSBAIJ_SeqAIJ", 1626 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1628 "MatConvert_SeqSBAIJ_SeqBAIJ", 1629 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1631 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1632 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1633 1634 B->symmetric = PETSC_TRUE; 1635 B->structurally_symmetric = PETSC_TRUE; 1636 B->symmetric_set = PETSC_TRUE; 1637 B->structurally_symmetric_set = PETSC_TRUE; 1638 PetscFunctionReturn(0); 1639 } 1640 EXTERN_C_END 1641 1642 #undef __FUNCT__ 1643 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1644 /*@C 1645 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1646 compressed row) format. For good matrix assembly performance the 1647 user should preallocate the matrix storage by setting the parameter nz 1648 (or the array nnz). By setting these parameters accurately, performance 1649 during matrix assembly can be increased by more than a factor of 50. 1650 1651 Collective on Mat 1652 1653 Input Parameters: 1654 + A - the symmetric matrix 1655 . bs - size of block 1656 . nz - number of block nonzeros per block row (same for all rows) 1657 - nnz - array containing the number of block nonzeros in the upper triangular plus 1658 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1659 1660 Options Database Keys: 1661 . -mat_no_unroll - uses code that does not unroll the loops in the 1662 block calculations (much slower) 1663 . -mat_block_size - size of the blocks to use 1664 1665 Level: intermediate 1666 1667 Notes: 1668 Specify the preallocated storage with either nz or nnz (not both). 1669 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1670 allocation. For additional details, see the users manual chapter on 1671 matrices. 1672 1673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1674 @*/ 1675 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1676 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 1677 1678 PetscFunctionBegin; 1679 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1680 if (f) { 1681 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatCreateSeqSBAIJ" 1688 /*@C 1689 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1690 compressed row) format. For good matrix assembly performance the 1691 user should preallocate the matrix storage by setting the parameter nz 1692 (or the array nnz). By setting these parameters accurately, performance 1693 during matrix assembly can be increased by more than a factor of 50. 1694 1695 Collective on MPI_Comm 1696 1697 Input Parameters: 1698 + comm - MPI communicator, set to PETSC_COMM_SELF 1699 . bs - size of block 1700 . m - number of rows, or number of columns 1701 . nz - number of block nonzeros per block row (same for all rows) 1702 - nnz - array containing the number of block nonzeros in the upper triangular plus 1703 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1704 1705 Output Parameter: 1706 . A - the symmetric matrix 1707 1708 Options Database Keys: 1709 . -mat_no_unroll - uses code that does not unroll the loops in the 1710 block calculations (much slower) 1711 . -mat_block_size - size of the blocks to use 1712 1713 Level: intermediate 1714 1715 Notes: 1716 1717 Specify the preallocated storage with either nz or nnz (not both). 1718 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1719 allocation. For additional details, see the users manual chapter on 1720 matrices. 1721 1722 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1723 @*/ 1724 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1725 { 1726 PetscErrorCode ierr; 1727 1728 PetscFunctionBegin; 1729 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1730 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1731 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1737 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1738 { 1739 Mat C; 1740 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1741 PetscErrorCode ierr; 1742 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1743 1744 PetscFunctionBegin; 1745 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1746 1747 *B = 0; 1748 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1749 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1750 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1751 c = (Mat_SeqSBAIJ*)C->data; 1752 1753 C->preallocated = PETSC_TRUE; 1754 C->factor = A->factor; 1755 c->row = 0; 1756 c->icol = 0; 1757 c->saved_values = 0; 1758 c->keepzeroedrows = a->keepzeroedrows; 1759 C->assembled = PETSC_TRUE; 1760 1761 C->M = A->M; 1762 C->N = A->N; 1763 c->bs = a->bs; 1764 c->bs2 = a->bs2; 1765 c->mbs = a->mbs; 1766 c->nbs = a->nbs; 1767 1768 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1769 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1770 for (i=0; i<mbs; i++) { 1771 c->imax[i] = a->imax[i]; 1772 c->ilen[i] = a->ilen[i]; 1773 } 1774 1775 /* allocate the matrix space */ 1776 c->singlemalloc = PETSC_TRUE; 1777 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1778 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1779 c->j = (int*)(c->a + nz*bs2); 1780 c->i = c->j + nz; 1781 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1782 if (mbs > 0) { 1783 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1784 if (cpvalues == MAT_COPY_VALUES) { 1785 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1786 } else { 1787 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1788 } 1789 } 1790 1791 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1792 c->sorted = a->sorted; 1793 c->roworiented = a->roworiented; 1794 c->nonew = a->nonew; 1795 1796 if (a->diag) { 1797 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1798 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1799 for (i=0; i<mbs; i++) { 1800 c->diag[i] = a->diag[i]; 1801 } 1802 } else c->diag = 0; 1803 c->nz = a->nz; 1804 c->maxnz = a->maxnz; 1805 c->solve_work = 0; 1806 c->mult_work = 0; 1807 *B = C; 1808 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1814 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1815 { 1816 Mat_SeqSBAIJ *a; 1817 Mat B; 1818 PetscErrorCode ierr; 1819 int i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1820 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1821 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1822 int *masked,nmask,tmp,bs2,ishift; 1823 PetscScalar *aa; 1824 MPI_Comm comm = ((PetscObject)viewer)->comm; 1825 1826 PetscFunctionBegin; 1827 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1828 bs2 = bs*bs; 1829 1830 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1831 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1832 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1833 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1834 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1835 M = header[1]; N = header[2]; nz = header[3]; 1836 1837 if (header[3] < 0) { 1838 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1839 } 1840 1841 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1842 1843 /* 1844 This code adds extra rows to make sure the number of rows is 1845 divisible by the blocksize 1846 */ 1847 mbs = M/bs; 1848 extra_rows = bs - M + bs*(mbs); 1849 if (extra_rows == bs) extra_rows = 0; 1850 else mbs++; 1851 if (extra_rows) { 1852 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1853 } 1854 1855 /* read in row lengths */ 1856 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1857 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1858 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1859 1860 /* read in column indices */ 1861 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1862 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1863 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1864 1865 /* loop over row lengths determining block row lengths */ 1866 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1867 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1868 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1869 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1870 masked = mask + mbs; 1871 rowcount = 0; nzcount = 0; 1872 for (i=0; i<mbs; i++) { 1873 nmask = 0; 1874 for (j=0; j<bs; j++) { 1875 kmax = rowlengths[rowcount]; 1876 for (k=0; k<kmax; k++) { 1877 tmp = jj[nzcount++]/bs; /* block col. index */ 1878 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1879 } 1880 rowcount++; 1881 } 1882 s_browlengths[i] += nmask; 1883 1884 /* zero out the mask elements we set */ 1885 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1886 } 1887 1888 /* create our matrix */ 1889 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1890 ierr = MatSetType(B,type);CHKERRQ(ierr); 1891 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1892 a = (Mat_SeqSBAIJ*)B->data; 1893 1894 /* set matrix "i" values */ 1895 a->i[0] = 0; 1896 for (i=1; i<= mbs; i++) { 1897 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1898 a->ilen[i-1] = s_browlengths[i-1]; 1899 } 1900 a->nz = a->i[mbs]; 1901 1902 /* read in nonzero values */ 1903 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1904 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1905 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1906 1907 /* set "a" and "j" values into matrix */ 1908 nzcount = 0; jcount = 0; 1909 for (i=0; i<mbs; i++) { 1910 nzcountb = nzcount; 1911 nmask = 0; 1912 for (j=0; j<bs; j++) { 1913 kmax = rowlengths[i*bs+j]; 1914 for (k=0; k<kmax; k++) { 1915 tmp = jj[nzcount++]/bs; /* block col. index */ 1916 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1917 } 1918 } 1919 /* sort the masked values */ 1920 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1921 1922 /* set "j" values into matrix */ 1923 maskcount = 1; 1924 for (j=0; j<nmask; j++) { 1925 a->j[jcount++] = masked[j]; 1926 mask[masked[j]] = maskcount++; 1927 } 1928 1929 /* set "a" values into matrix */ 1930 ishift = bs2*a->i[i]; 1931 for (j=0; j<bs; j++) { 1932 kmax = rowlengths[i*bs+j]; 1933 for (k=0; k<kmax; k++) { 1934 tmp = jj[nzcountb]/bs ; /* block col. index */ 1935 if (tmp >= i){ 1936 block = mask[tmp] - 1; 1937 point = jj[nzcountb] - bs*tmp; 1938 idx = ishift + bs2*block + j + bs*point; 1939 a->a[idx] = aa[nzcountb]; 1940 } 1941 nzcountb++; 1942 } 1943 } 1944 /* zero out the mask elements we set */ 1945 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1946 } 1947 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1948 1949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1950 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1951 ierr = PetscFree(aa);CHKERRQ(ierr); 1952 ierr = PetscFree(jj);CHKERRQ(ierr); 1953 ierr = PetscFree(mask);CHKERRQ(ierr); 1954 1955 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatView_Private(B);CHKERRQ(ierr); 1958 *A = B; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1964 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1965 { 1966 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1967 MatScalar *aa=a->a,*v,*v1; 1968 PetscScalar *x,*b,*t,sum,d; 1969 PetscErrorCode ierr; 1970 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j; 1971 int nz,nz1,*vj,*vj1,i; 1972 1973 PetscFunctionBegin; 1974 its = its*lits; 1975 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1976 1977 if (bs > 1) 1978 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1979 1980 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1981 if (xx != bb) { 1982 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1983 } else { 1984 b = x; 1985 } 1986 1987 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1988 1989 if (flag & SOR_ZERO_INITIAL_GUESS) { 1990 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1991 for (i=0; i<m; i++) 1992 t[i] = b[i]; 1993 1994 for (i=0; i<m; i++){ 1995 d = *(aa + ai[i]); /* diag[i] */ 1996 v = aa + ai[i] + 1; 1997 vj = aj + ai[i] + 1; 1998 nz = ai[i+1] - ai[i] - 1; 1999 x[i] = omega*t[i]/d; 2000 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2001 PetscLogFlops(2*nz-1); 2002 } 2003 } 2004 2005 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2006 for (i=0; i<m; i++) 2007 t[i] = b[i]; 2008 2009 for (i=0; i<m-1; i++){ /* update rhs */ 2010 v = aa + ai[i] + 1; 2011 vj = aj + ai[i] + 1; 2012 nz = ai[i+1] - ai[i] - 1; 2013 while (nz--) t[*vj++] -= x[i]*(*v++); 2014 PetscLogFlops(2*nz-1); 2015 } 2016 for (i=m-1; i>=0; i--){ 2017 d = *(aa + ai[i]); 2018 v = aa + ai[i] + 1; 2019 vj = aj + ai[i] + 1; 2020 nz = ai[i+1] - ai[i] - 1; 2021 sum = t[i]; 2022 while (nz--) sum -= x[*vj++]*(*v++); 2023 PetscLogFlops(2*nz-1); 2024 x[i] = (1-omega)*x[i] + omega*sum/d; 2025 } 2026 } 2027 its--; 2028 } 2029 2030 while (its--) { 2031 /* 2032 forward sweep: 2033 for i=0,...,m-1: 2034 sum[i] = (b[i] - U(i,:)x )/d[i]; 2035 x[i] = (1-omega)x[i] + omega*sum[i]; 2036 b = b - x[i]*U^T(i,:); 2037 2038 */ 2039 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2040 for (i=0; i<m; i++) 2041 t[i] = b[i]; 2042 2043 for (i=0; i<m; i++){ 2044 d = *(aa + ai[i]); /* diag[i] */ 2045 v = aa + ai[i] + 1; v1=v; 2046 vj = aj + ai[i] + 1; vj1=vj; 2047 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2048 sum = t[i]; 2049 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2050 x[i] = (1-omega)*x[i] + omega*sum/d; 2051 while (nz--) t[*vj++] -= x[i]*(*v++); 2052 PetscLogFlops(4*nz-2); 2053 } 2054 } 2055 2056 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2057 /* 2058 backward sweep: 2059 b = b - x[i]*U^T(i,:), i=0,...,n-2 2060 for i=m-1,...,0: 2061 sum[i] = (b[i] - U(i,:)x )/d[i]; 2062 x[i] = (1-omega)x[i] + omega*sum[i]; 2063 */ 2064 for (i=0; i<m; i++) 2065 t[i] = b[i]; 2066 2067 for (i=0; i<m-1; i++){ /* update rhs */ 2068 v = aa + ai[i] + 1; 2069 vj = aj + ai[i] + 1; 2070 nz = ai[i+1] - ai[i] - 1; 2071 while (nz--) t[*vj++] -= x[i]*(*v++); 2072 PetscLogFlops(2*nz-1); 2073 } 2074 for (i=m-1; i>=0; i--){ 2075 d = *(aa + ai[i]); 2076 v = aa + ai[i] + 1; 2077 vj = aj + ai[i] + 1; 2078 nz = ai[i+1] - ai[i] - 1; 2079 sum = t[i]; 2080 while (nz--) sum -= x[*vj++]*(*v++); 2081 PetscLogFlops(2*nz-1); 2082 x[i] = (1-omega)*x[i] + omega*sum/d; 2083 } 2084 } 2085 } 2086 2087 ierr = PetscFree(t);CHKERRQ(ierr); 2088 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2089 if (bb != xx) { 2090 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2091 } 2092 2093 PetscFunctionReturn(0); 2094 } 2095 2096 2097 2098 2099 2100 2101