1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 101 if (a->row) { 102 ierr = ISDestroy(a->row);CHKERRQ(ierr); 103 } 104 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 105 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 106 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 107 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 108 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 111 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 131 { 132 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 switch (op) { 137 case MAT_ROW_ORIENTED: 138 a->roworiented = PETSC_TRUE; 139 break; 140 case MAT_COLUMN_ORIENTED: 141 a->roworiented = PETSC_FALSE; 142 break; 143 case MAT_COLUMNS_SORTED: 144 a->sorted = PETSC_TRUE; 145 break; 146 case MAT_COLUMNS_UNSORTED: 147 a->sorted = PETSC_FALSE; 148 break; 149 case MAT_KEEP_ZEROED_ROWS: 150 a->keepzeroedrows = PETSC_TRUE; 151 break; 152 case MAT_NO_NEW_NONZERO_LOCATIONS: 153 a->nonew = 1; 154 break; 155 case MAT_NEW_NONZERO_LOCATION_ERR: 156 a->nonew = -1; 157 break; 158 case MAT_NEW_NONZERO_ALLOCATION_ERR: 159 a->nonew = -2; 160 break; 161 case MAT_YES_NEW_NONZERO_LOCATIONS: 162 a->nonew = 0; 163 break; 164 case MAT_ROWS_SORTED: 165 case MAT_ROWS_UNSORTED: 166 case MAT_YES_NEW_DIAGONALS: 167 case MAT_IGNORE_OFF_PROC_ENTRIES: 168 case MAT_USE_HASH_TABLE: 169 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 170 break; 171 case MAT_NO_NEW_DIAGONALS: 172 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 173 case MAT_NOT_SYMMETRIC: 174 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 175 case MAT_HERMITIAN: 176 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 177 case MAT_SYMMETRIC: 178 case MAT_STRUCTURALLY_SYMMETRIC: 179 case MAT_NOT_HERMITIAN: 180 case MAT_SYMMETRY_ETERNAL: 181 case MAT_NOT_SYMMETRY_ETERNAL: 182 case MAT_IGNORE_LOWER_TRIANGULAR: 183 a->ignore_ltriangular = PETSC_TRUE; 184 break; 185 case MAT_ERROR_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_FALSE; 187 break; 188 default: 189 SETERRQ(PETSC_ERR_SUP,"unknown option"); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 197 { 198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 199 PetscErrorCode ierr; 200 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 201 MatScalar *aa,*aa_i; 202 PetscScalar *v_i; 203 204 PetscFunctionBegin; 205 bs = A->bs; 206 ai = a->i; 207 aj = a->j; 208 aa = a->a; 209 bs2 = a->bs2; 210 211 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 212 213 bn = row/bs; /* Block number */ 214 bp = row % bs; /* Block position */ 215 M = ai[bn+1] - ai[bn]; 216 *ncols = bs*M; 217 218 if (v) { 219 *v = 0; 220 if (*ncols) { 221 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 222 for (i=0; i<M; i++) { /* for each block in the block row */ 223 v_i = *v + i*bs; 224 aa_i = aa + bs2*(ai[bn] + i); 225 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 226 } 227 } 228 } 229 230 if (cols) { 231 *cols = 0; 232 if (*ncols) { 233 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 234 for (i=0; i<M; i++) { /* for each block in the block row */ 235 cols_i = *cols + i*bs; 236 itmp = bs*aj[ai[bn] + i]; 237 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 238 } 239 } 240 } 241 242 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 243 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 244 #ifdef column_search 245 v_i = *v + M*bs; 246 cols_i = *cols + M*bs; 247 for (i=0; i<bn; i++){ /* for each block row */ 248 M = ai[i+1] - ai[i]; 249 for (j=0; j<M; j++){ 250 itmp = aj[ai[i] + j]; /* block column value */ 251 if (itmp == bn){ 252 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 253 for (k=0; k<bs; k++) { 254 *cols_i++ = i*bs+k; 255 *v_i++ = aa_i[k]; 256 } 257 *ncols += bs; 258 break; 259 } 260 } 261 } 262 #endif 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 268 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 269 { 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 274 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 280 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 281 { 282 PetscErrorCode ierr; 283 PetscFunctionBegin; 284 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 290 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 291 { 292 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 293 PetscErrorCode ierr; 294 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 295 const char *name; 296 PetscViewerFormat format; 297 298 PetscFunctionBegin; 299 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 300 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 301 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 302 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 303 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 304 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 305 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 306 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 307 for (i=0; i<a->mbs; i++) { 308 for (j=0; j<bs; j++) { 309 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 310 for (k=a->i[i]; k<a->i[i+1]; k++) { 311 for (l=0; l<bs; l++) { 312 #if defined(PETSC_USE_COMPLEX) 313 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 314 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 315 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 316 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } 322 #else 323 if (a->a[bs2*k + l*bs + j] != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 325 } 326 #endif 327 } 328 } 329 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 333 } else { 334 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 335 for (i=0; i<a->mbs; i++) { 336 for (j=0; j<bs; j++) { 337 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 338 for (k=a->i[i]; k<a->i[i+1]; k++) { 339 for (l=0; l<bs; l++) { 340 #if defined(PETSC_USE_COMPLEX) 341 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 342 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 343 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 344 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } 350 #else 351 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 352 #endif 353 } 354 } 355 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 356 } 357 } 358 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 359 } 360 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 366 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 367 { 368 Mat A = (Mat) Aa; 369 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 372 PetscMPIInt rank; 373 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 374 MatScalar *aa; 375 MPI_Comm comm; 376 PetscViewer viewer; 377 378 PetscFunctionBegin; 379 /* 380 This is nasty. If this is called from an originally parallel matrix 381 then all processes call this,but only the first has the matrix so the 382 rest should return immediately. 383 */ 384 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 385 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 386 if (rank) PetscFunctionReturn(0); 387 388 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 389 390 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 391 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 392 393 /* loop over matrix elements drawing boxes */ 394 color = PETSC_DRAW_BLUE; 395 for (i=0,row=0; i<mbs; i++,row+=bs) { 396 for (j=a->i[i]; j<a->i[i+1]; j++) { 397 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 398 x_l = a->j[j]*bs; x_r = x_l + 1.0; 399 aa = a->a + j*bs2; 400 for (k=0; k<bs; k++) { 401 for (l=0; l<bs; l++) { 402 if (PetscRealPart(*aa++) >= 0.) continue; 403 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 404 } 405 } 406 } 407 } 408 color = PETSC_DRAW_CYAN; 409 for (i=0,row=0; i<mbs; i++,row+=bs) { 410 for (j=a->i[i]; j<a->i[i+1]; j++) { 411 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 412 x_l = a->j[j]*bs; x_r = x_l + 1.0; 413 aa = a->a + j*bs2; 414 for (k=0; k<bs; k++) { 415 for (l=0; l<bs; l++) { 416 if (PetscRealPart(*aa++) != 0.) continue; 417 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 418 } 419 } 420 } 421 } 422 423 color = PETSC_DRAW_RED; 424 for (i=0,row=0; i<mbs; i++,row+=bs) { 425 for (j=a->i[i]; j<a->i[i+1]; j++) { 426 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 427 x_l = a->j[j]*bs; x_r = x_l + 1.0; 428 aa = a->a + j*bs2; 429 for (k=0; k<bs; k++) { 430 for (l=0; l<bs; l++) { 431 if (PetscRealPart(*aa++) <= 0.) continue; 432 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 433 } 434 } 435 } 436 } 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 442 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 443 { 444 PetscErrorCode ierr; 445 PetscReal xl,yl,xr,yr,w,h; 446 PetscDraw draw; 447 PetscTruth isnull; 448 449 PetscFunctionBegin; 450 451 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 452 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 453 454 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 455 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 456 xr += w; yr += h; xl = -w; yl = -h; 457 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 458 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 459 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatView_SeqSBAIJ" 465 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 466 { 467 PetscErrorCode ierr; 468 PetscTruth iascii,isdraw; 469 470 PetscFunctionBegin; 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 472 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 473 if (iascii){ 474 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 475 } else if (isdraw) { 476 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 477 } else { 478 Mat B; 479 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 480 ierr = MatView(B,viewer);CHKERRQ(ierr); 481 ierr = MatDestroy(B);CHKERRQ(ierr); 482 } 483 PetscFunctionReturn(0); 484 } 485 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 489 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 490 { 491 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 492 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 493 PetscInt *ai = a->i,*ailen = a->ilen; 494 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 495 MatScalar *ap,*aa = a->a,zero = 0.0; 496 497 PetscFunctionBegin; 498 for (k=0; k<m; k++) { /* loop over rows */ 499 row = im[k]; brow = row/bs; 500 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 501 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 502 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 503 nrow = ailen[brow]; 504 for (l=0; l<n; l++) { /* loop over columns */ 505 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 506 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 507 col = in[l] ; 508 bcol = col/bs; 509 cidx = col%bs; 510 ridx = row%bs; 511 high = nrow; 512 low = 0; /* assume unsorted */ 513 while (high-low > 5) { 514 t = (low+high)/2; 515 if (rp[t] > bcol) high = t; 516 else low = t; 517 } 518 for (i=low; i<high; i++) { 519 if (rp[i] > bcol) break; 520 if (rp[i] == bcol) { 521 *v++ = ap[bs2*i+bs*cidx+ridx]; 522 goto finished; 523 } 524 } 525 *v++ = zero; 526 finished:; 527 } 528 } 529 PetscFunctionReturn(0); 530 } 531 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 535 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 536 { 537 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 538 PetscErrorCode ierr; 539 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 540 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 541 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 542 PetscTruth roworiented=a->roworiented; 543 const MatScalar *value = v; 544 MatScalar *ap,*aa = a->a,*bap; 545 546 PetscFunctionBegin; 547 if (roworiented) { 548 stepval = (n-1)*bs; 549 } else { 550 stepval = (m-1)*bs; 551 } 552 for (k=0; k<m; k++) { /* loop over added rows */ 553 row = im[k]; 554 if (row < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 557 #endif 558 rp = aj + ai[row]; 559 ap = aa + bs2*ai[row]; 560 rmax = imax[row]; 561 nrow = ailen[row]; 562 low = 0; 563 high = nrow; 564 for (l=0; l<n; l++) { /* loop over added columns */ 565 if (in[l] < 0) continue; 566 col = in[l]; 567 #if defined(PETSC_USE_DEBUG) 568 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 569 #endif 570 if (col < row) continue; /* ignore lower triangular block */ 571 if (roworiented) { 572 value = v + k*(stepval+bs)*bs + l*bs; 573 } else { 574 value = v + l*(stepval+bs)*bs + k*bs; 575 } 576 if (col <= lastcol) low = 0; else high = nrow; 577 lastcol = col; 578 while (high-low > 7) { 579 t = (low+high)/2; 580 if (rp[t] > col) high = t; 581 else low = t; 582 } 583 for (i=low; i<high; i++) { 584 if (rp[i] > col) break; 585 if (rp[i] == col) { 586 bap = ap + bs2*i; 587 if (roworiented) { 588 if (is == ADD_VALUES) { 589 for (ii=0; ii<bs; ii++,value+=stepval) { 590 for (jj=ii; jj<bs2; jj+=bs) { 591 bap[jj] += *value++; 592 } 593 } 594 } else { 595 for (ii=0; ii<bs; ii++,value+=stepval) { 596 for (jj=ii; jj<bs2; jj+=bs) { 597 bap[jj] = *value++; 598 } 599 } 600 } 601 } else { 602 if (is == ADD_VALUES) { 603 for (ii=0; ii<bs; ii++,value+=stepval) { 604 for (jj=0; jj<bs; jj++) { 605 *bap++ += *value++; 606 } 607 } 608 } else { 609 for (ii=0; ii<bs; ii++,value+=stepval) { 610 for (jj=0; jj<bs; jj++) { 611 *bap++ = *value++; 612 } 613 } 614 } 615 } 616 goto noinsert2; 617 } 618 } 619 if (nonew == 1) goto noinsert2; 620 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 621 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 622 N = nrow++ - 1; 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 627 } 628 if (N >= i) { 629 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 630 } 631 rp[i] = col; 632 bap = ap + bs2*i; 633 if (roworiented) { 634 for (ii=0; ii<bs; ii++,value+=stepval) { 635 for (jj=ii; jj<bs2; jj+=bs) { 636 bap[jj] = *value++; 637 } 638 } 639 } else { 640 for (ii=0; ii<bs; ii++,value+=stepval) { 641 for (jj=0; jj<bs; jj++) { 642 *bap++ = *value++; 643 } 644 } 645 } 646 noinsert2:; 647 low = i; 648 } 649 ailen[row] = nrow; 650 } 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 656 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 657 { 658 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 659 PetscErrorCode ierr; 660 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 661 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 662 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 663 MatScalar *aa = a->a,*ap; 664 665 PetscFunctionBegin; 666 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 667 668 if (m) rmax = ailen[0]; 669 for (i=1; i<mbs; i++) { 670 /* move each row back by the amount of empty slots (fshift) before it*/ 671 fshift += imax[i-1] - ailen[i-1]; 672 rmax = PetscMax(rmax,ailen[i]); 673 if (fshift) { 674 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 675 N = ailen[i]; 676 for (j=0; j<N; j++) { 677 ip[j-fshift] = ip[j]; 678 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 679 } 680 } 681 ai[i] = ai[i-1] + ailen[i-1]; 682 } 683 if (mbs) { 684 fshift += imax[mbs-1] - ailen[mbs-1]; 685 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 686 } 687 /* reset ilen and imax for each row */ 688 for (i=0; i<mbs; i++) { 689 ailen[i] = imax[i] = ai[i+1] - ai[i]; 690 } 691 a->nz = ai[mbs]; 692 693 /* diagonals may have moved, reset it */ 694 if (a->diag) { 695 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 696 } 697 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 698 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 699 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 700 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 701 a->reallocs = 0; 702 A->info.nz_unneeded = (PetscReal)fshift*bs2; 703 PetscFunctionReturn(0); 704 } 705 706 /* 707 This function returns an array of flags which indicate the locations of contiguous 708 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 709 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 710 Assume: sizes should be long enough to hold all the values. 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 714 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 715 { 716 PetscInt i,j,k,row; 717 PetscTruth flg; 718 719 PetscFunctionBegin; 720 for (i=0,j=0; i<n; j++) { 721 row = idx[i]; 722 if (row%bs!=0) { /* Not the begining of a block */ 723 sizes[j] = 1; 724 i++; 725 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 726 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 727 i++; 728 } else { /* Begining of the block, so check if the complete block exists */ 729 flg = PETSC_TRUE; 730 for (k=1; k<bs; k++) { 731 if (row+k != idx[i+k]) { /* break in the block */ 732 flg = PETSC_FALSE; 733 break; 734 } 735 } 736 if (flg) { /* No break in the bs */ 737 sizes[j] = bs; 738 i+= bs; 739 } else { 740 sizes[j] = 1; 741 i++; 742 } 743 } 744 } 745 *bs_max = j; 746 PetscFunctionReturn(0); 747 } 748 749 750 /* Only add/insert a(i,j) with i<=j (blocks). 751 Any a(i,j) with i>j input by user is ingored. 752 */ 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 756 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 757 { 758 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 759 PetscErrorCode ierr; 760 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 761 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 762 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 763 PetscInt ridx,cidx,bs2=a->bs2; 764 MatScalar *ap,value,*aa=a->a,*bap; 765 766 PetscFunctionBegin; 767 for (k=0; k<m; k++) { /* loop over added rows */ 768 row = im[k]; /* row number */ 769 brow = row/bs; /* block row number */ 770 if (row < 0) continue; 771 #if defined(PETSC_USE_DEBUG) 772 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 773 #endif 774 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 775 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 776 rmax = imax[brow]; /* maximum space allocated for this row */ 777 nrow = ailen[brow]; /* actual length of this row */ 778 low = 0; 779 780 for (l=0; l<n; l++) { /* loop over added columns */ 781 if (in[l] < 0) continue; 782 #if defined(PETSC_USE_DEBUG) 783 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 784 #endif 785 col = in[l]; 786 bcol = col/bs; /* block col number */ 787 788 if (brow > bcol) { 789 if (a->ignore_ltriangular){ 790 continue; /* ignore lower triangular values */ 791 } else { 792 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 793 } 794 } 795 796 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 797 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 798 /* element value a(k,l) */ 799 if (roworiented) { 800 value = v[l + k*n]; 801 } else { 802 value = v[k + l*m]; 803 } 804 805 /* move pointer bap to a(k,l) quickly and add/insert value */ 806 if (col <= lastcol) low = 0; high = nrow; 807 lastcol = col; 808 while (high-low > 7) { 809 t = (low+high)/2; 810 if (rp[t] > bcol) high = t; 811 else low = t; 812 } 813 for (i=low; i<high; i++) { 814 if (rp[i] > bcol) break; 815 if (rp[i] == bcol) { 816 bap = ap + bs2*i + bs*cidx + ridx; 817 if (is == ADD_VALUES) *bap += value; 818 else *bap = value; 819 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 820 if (brow == bcol && ridx < cidx){ 821 bap = ap + bs2*i + bs*ridx + cidx; 822 if (is == ADD_VALUES) *bap += value; 823 else *bap = value; 824 } 825 goto noinsert1; 826 } 827 } 828 829 if (nonew == 1) goto noinsert1; 830 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 831 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 832 833 N = nrow++ - 1; 834 /* shift up all the later entries in this row */ 835 for (ii=N; ii>=i; ii--) { 836 rp[ii+1] = rp[ii]; 837 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 838 } 839 if (N>=i) { 840 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 841 } 842 rp[i] = bcol; 843 ap[bs2*i + bs*cidx + ridx] = value; 844 noinsert1:; 845 low = i; 846 } 847 } /* end of loop over added columns */ 848 ailen[brow] = nrow; 849 } /* end of loop over added rows */ 850 PetscFunctionReturn(0); 851 } 852 853 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 854 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 855 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 856 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 857 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 858 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar); 859 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 860 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 861 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 862 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 863 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 864 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 865 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 866 867 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 868 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 869 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 870 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 871 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 872 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 873 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 874 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 875 876 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 877 878 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 879 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 880 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 881 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 882 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 883 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 884 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 885 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 886 887 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 888 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 889 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 890 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 891 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 892 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 893 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 894 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 895 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 896 897 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 898 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 899 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 900 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 901 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 902 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 903 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 904 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 905 906 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 907 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 908 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 909 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 910 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 911 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 912 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 913 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 914 915 #ifdef HAVE_MatICCFactor 916 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 919 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 920 { 921 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 922 Mat outA; 923 PetscErrorCode ierr; 924 PetscTruth row_identity,col_identity; 925 926 PetscFunctionBegin; 927 outA = inA; 928 inA->factor = FACTOR_CHOLESKY; 929 930 if (!a->diag) { 931 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 932 } 933 /* 934 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 935 for ILU(0) factorization with natural ordering 936 */ 937 switch (a->bs) { 938 case 1: 939 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 940 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 941 inA->ops->solves = MatSolves_SeqSBAIJ_1; 942 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 943 case 2: 944 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 945 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 946 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 947 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 948 break; 949 case 3: 950 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 951 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 952 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 953 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 954 break; 955 case 4: 956 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 957 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 958 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 959 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 960 break; 961 case 5: 962 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 963 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 964 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 965 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 966 break; 967 case 6: 968 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 969 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 970 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 971 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 972 break; 973 case 7: 974 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 975 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 976 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 977 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 978 break; 979 default: 980 a->row = row; 981 a->icol = col; 982 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 983 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 984 985 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 986 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 987 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 988 989 if (!a->solve_work) { 990 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 991 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 992 } 993 } 994 995 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 #endif 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1002 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1003 { 1004 static PetscTruth called = PETSC_FALSE; 1005 MPI_Comm comm = A->comm; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1010 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1011 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1012 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 EXTERN_C_BEGIN 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1019 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1020 { 1021 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1022 PetscInt i,nz,n; 1023 1024 PetscFunctionBegin; 1025 nz = baij->maxnz; 1026 n = mat->n; 1027 for (i=0; i<nz; i++) { 1028 baij->j[i] = indices[i]; 1029 } 1030 baij->nz = nz; 1031 for (i=0; i<n; i++) { 1032 baij->ilen[i] = baij->imax[i]; 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 EXTERN_C_END 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1040 /*@ 1041 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1042 in the matrix. 1043 1044 Input Parameters: 1045 + mat - the SeqSBAIJ matrix 1046 - indices - the column indices 1047 1048 Level: advanced 1049 1050 Notes: 1051 This can be called if you have precomputed the nonzero structure of the 1052 matrix and want to provide it to the matrix object to improve the performance 1053 of the MatSetValues() operation. 1054 1055 You MUST have set the correct numbers of nonzeros per row in the call to 1056 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1057 1058 MUST be called before any calls to MatSetValues() 1059 1060 .seealso: MatCreateSeqSBAIJ 1061 @*/ 1062 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1063 { 1064 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1065 1066 PetscFunctionBegin; 1067 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1068 PetscValidPointer(indices,2); 1069 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1070 if (f) { 1071 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1072 } else { 1073 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1080 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1081 { 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1091 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1092 { 1093 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1094 PetscFunctionBegin; 1095 *array = a->a; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1101 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1102 { 1103 PetscFunctionBegin; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #include "petscblaslapack.h" 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1110 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1111 { 1112 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1113 PetscErrorCode ierr; 1114 PetscInt i,bs=Y->bs,bs2,j; 1115 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1116 1117 PetscFunctionBegin; 1118 if (str == SAME_NONZERO_PATTERN) { 1119 PetscScalar alpha = a; 1120 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1121 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1122 if (y->xtoy && y->XtoY != X) { 1123 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1124 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1125 } 1126 if (!y->xtoy) { /* get xtoy */ 1127 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1128 y->XtoY = X; 1129 } 1130 bs2 = bs*bs; 1131 for (i=0; i<x->nz; i++) { 1132 j = 0; 1133 while (j < bs2){ 1134 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1135 j++; 1136 } 1137 } 1138 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1139 } else { 1140 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1147 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1148 { 1149 PetscFunctionBegin; 1150 *flg = PETSC_TRUE; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1156 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1157 { 1158 PetscFunctionBegin; 1159 *flg = PETSC_TRUE; 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1165 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1166 { 1167 PetscFunctionBegin; 1168 *flg = PETSC_FALSE; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 /* -------------------------------------------------------------------*/ 1173 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1174 MatGetRow_SeqSBAIJ, 1175 MatRestoreRow_SeqSBAIJ, 1176 MatMult_SeqSBAIJ_N, 1177 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1178 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1179 MatMultAdd_SeqSBAIJ_N, 1180 MatSolve_SeqSBAIJ_N, 1181 0, 1182 0, 1183 /*10*/ 0, 1184 0, 1185 MatCholeskyFactor_SeqSBAIJ, 1186 MatRelax_SeqSBAIJ, 1187 MatTranspose_SeqSBAIJ, 1188 /*15*/ MatGetInfo_SeqSBAIJ, 1189 MatEqual_SeqSBAIJ, 1190 MatGetDiagonal_SeqSBAIJ, 1191 MatDiagonalScale_SeqSBAIJ, 1192 MatNorm_SeqSBAIJ, 1193 /*20*/ 0, 1194 MatAssemblyEnd_SeqSBAIJ, 1195 0, 1196 MatSetOption_SeqSBAIJ, 1197 MatZeroEntries_SeqSBAIJ, 1198 /*25*/ 0, 1199 0, 1200 0, 1201 MatCholeskyFactorSymbolic_SeqSBAIJ, 1202 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1203 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1204 0, 1205 MatICCFactorSymbolic_SeqSBAIJ, 1206 MatGetArray_SeqSBAIJ, 1207 MatRestoreArray_SeqSBAIJ, 1208 /*35*/ MatDuplicate_SeqSBAIJ, 1209 0, 1210 0, 1211 0, 1212 0, 1213 /*40*/ MatAXPY_SeqSBAIJ, 1214 MatGetSubMatrices_SeqSBAIJ, 1215 MatIncreaseOverlap_SeqSBAIJ, 1216 MatGetValues_SeqSBAIJ, 1217 0, 1218 /*45*/ MatPrintHelp_SeqSBAIJ, 1219 MatScale_SeqSBAIJ, 1220 0, 1221 0, 1222 0, 1223 /*50*/ 0, 1224 MatGetRowIJ_SeqSBAIJ, 1225 MatRestoreRowIJ_SeqSBAIJ, 1226 0, 1227 0, 1228 /*55*/ 0, 1229 0, 1230 0, 1231 0, 1232 MatSetValuesBlocked_SeqSBAIJ, 1233 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1234 0, 1235 0, 1236 MatGetPetscMaps_Petsc, 1237 0, 1238 /*65*/ 0, 1239 0, 1240 0, 1241 0, 1242 0, 1243 /*70*/ MatGetRowMax_SeqSBAIJ, 1244 0, 1245 0, 1246 0, 1247 0, 1248 /*75*/ 0, 1249 0, 1250 0, 1251 0, 1252 0, 1253 /*80*/ 0, 1254 0, 1255 0, 1256 #if !defined(PETSC_USE_COMPLEX) 1257 MatGetInertia_SeqSBAIJ, 1258 #else 1259 0, 1260 #endif 1261 MatLoad_SeqSBAIJ, 1262 /*85*/ MatIsSymmetric_SeqSBAIJ, 1263 MatIsHermitian_SeqSBAIJ, 1264 MatIsStructurallySymmetric_SeqSBAIJ, 1265 0, 1266 0, 1267 /*90*/ 0, 1268 0, 1269 0, 1270 0, 1271 0, 1272 /*95*/ 0, 1273 0, 1274 0, 1275 0}; 1276 1277 EXTERN_C_BEGIN 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1280 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1281 { 1282 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1283 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 if (aij->nonew != 1) { 1288 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1289 } 1290 1291 /* allocate space for values if not already there */ 1292 if (!aij->saved_values) { 1293 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1294 } 1295 1296 /* copy values over */ 1297 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 EXTERN_C_END 1301 1302 EXTERN_C_BEGIN 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1305 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1306 { 1307 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1308 PetscErrorCode ierr; 1309 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1310 1311 PetscFunctionBegin; 1312 if (aij->nonew != 1) { 1313 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1314 } 1315 if (!aij->saved_values) { 1316 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1317 } 1318 1319 /* copy values over */ 1320 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 EXTERN_C_END 1324 1325 EXTERN_C_BEGIN 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1328 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1329 { 1330 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1331 PetscErrorCode ierr; 1332 PetscInt i,mbs,bs2; 1333 PetscTruth skipallocation = PETSC_FALSE,flg; 1334 1335 PetscFunctionBegin; 1336 B->preallocated = PETSC_TRUE; 1337 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1338 mbs = B->m/bs; 1339 bs2 = bs*bs; 1340 1341 if (mbs*bs != B->m) { 1342 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1343 } 1344 1345 if (nz == MAT_SKIP_ALLOCATION) { 1346 skipallocation = PETSC_TRUE; 1347 nz = 0; 1348 } 1349 1350 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1351 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1352 if (nnz) { 1353 for (i=0; i<mbs; i++) { 1354 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1355 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1356 } 1357 } 1358 1359 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1360 if (!flg) { 1361 switch (bs) { 1362 case 1: 1363 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1364 B->ops->solve = MatSolve_SeqSBAIJ_1; 1365 B->ops->solves = MatSolves_SeqSBAIJ_1; 1366 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1367 B->ops->mult = MatMult_SeqSBAIJ_1; 1368 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1369 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1370 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1371 break; 1372 case 2: 1373 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1374 B->ops->solve = MatSolve_SeqSBAIJ_2; 1375 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1376 B->ops->mult = MatMult_SeqSBAIJ_2; 1377 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1378 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1379 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1380 break; 1381 case 3: 1382 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1383 B->ops->solve = MatSolve_SeqSBAIJ_3; 1384 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1385 B->ops->mult = MatMult_SeqSBAIJ_3; 1386 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1387 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1388 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1389 break; 1390 case 4: 1391 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1392 B->ops->solve = MatSolve_SeqSBAIJ_4; 1393 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1394 B->ops->mult = MatMult_SeqSBAIJ_4; 1395 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1396 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1397 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1398 break; 1399 case 5: 1400 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1401 B->ops->solve = MatSolve_SeqSBAIJ_5; 1402 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1403 B->ops->mult = MatMult_SeqSBAIJ_5; 1404 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1405 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1406 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1407 break; 1408 case 6: 1409 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1410 B->ops->solve = MatSolve_SeqSBAIJ_6; 1411 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1412 B->ops->mult = MatMult_SeqSBAIJ_6; 1413 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1414 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1415 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1416 break; 1417 case 7: 1418 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1419 B->ops->solve = MatSolve_SeqSBAIJ_7; 1420 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1421 B->ops->mult = MatMult_SeqSBAIJ_7; 1422 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1423 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1424 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1425 break; 1426 } 1427 } 1428 1429 b->mbs = mbs; 1430 b->nbs = mbs; 1431 if (!skipallocation) { 1432 /* b->ilen will count nonzeros in each block row so far. */ 1433 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1434 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1435 if (!nnz) { 1436 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1437 else if (nz <= 0) nz = 1; 1438 for (i=0; i<mbs; i++) { 1439 b->imax[i] = nz; 1440 } 1441 nz = nz*mbs; /* total nz */ 1442 } else { 1443 nz = 0; 1444 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1445 } 1446 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1447 1448 /* allocate the matrix space */ 1449 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1450 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1451 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1452 b->singlemalloc = PETSC_TRUE; 1453 1454 /* pointer to beginning of each row */ 1455 b->i[0] = 0; 1456 for (i=1; i<mbs+1; i++) { 1457 b->i[i] = b->i[i-1] + b->imax[i-1]; 1458 } 1459 } 1460 1461 B->bs = bs; 1462 b->bs2 = bs2; 1463 b->nz = 0; 1464 b->maxnz = nz*bs2; 1465 1466 b->inew = 0; 1467 b->jnew = 0; 1468 b->anew = 0; 1469 b->a2anew = 0; 1470 b->permute = PETSC_FALSE; 1471 PetscFunctionReturn(0); 1472 } 1473 EXTERN_C_END 1474 1475 EXTERN_C_BEGIN 1476 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1477 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1478 EXTERN_C_END 1479 1480 /*MC 1481 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1482 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1483 1484 Options Database Keys: 1485 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1486 1487 Level: beginner 1488 1489 .seealso: MatCreateSeqSBAIJ 1490 M*/ 1491 1492 EXTERN_C_BEGIN 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1495 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1496 { 1497 Mat_SeqSBAIJ *b; 1498 PetscErrorCode ierr; 1499 PetscMPIInt size; 1500 PetscTruth flg; 1501 1502 PetscFunctionBegin; 1503 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1504 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1505 B->m = B->M = PetscMax(B->m,B->M); 1506 B->n = B->N = PetscMax(B->n,B->N); 1507 1508 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1509 B->data = (void*)b; 1510 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1511 B->ops->destroy = MatDestroy_SeqSBAIJ; 1512 B->ops->view = MatView_SeqSBAIJ; 1513 B->factor = 0; 1514 B->mapping = 0; 1515 b->row = 0; 1516 b->icol = 0; 1517 b->reallocs = 0; 1518 b->saved_values = 0; 1519 1520 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1521 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1522 1523 b->sorted = PETSC_FALSE; 1524 b->roworiented = PETSC_TRUE; 1525 b->nonew = 0; 1526 b->diag = 0; 1527 b->solve_work = 0; 1528 b->mult_work = 0; 1529 B->spptr = 0; 1530 b->keepzeroedrows = PETSC_FALSE; 1531 b->xtoy = 0; 1532 b->XtoY = 0; 1533 1534 b->inew = 0; 1535 b->jnew = 0; 1536 b->anew = 0; 1537 b->a2anew = 0; 1538 b->permute = PETSC_FALSE; 1539 1540 b->ignore_ltriangular = PETSC_FALSE; 1541 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1542 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1543 1544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1545 "MatStoreValues_SeqSBAIJ", 1546 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1547 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1548 "MatRetrieveValues_SeqSBAIJ", 1549 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1550 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1551 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1552 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1553 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1554 "MatConvert_SeqSBAIJ_SeqAIJ", 1555 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1557 "MatConvert_SeqSBAIJ_SeqBAIJ", 1558 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1559 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1560 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1561 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1562 1563 B->symmetric = PETSC_TRUE; 1564 B->structurally_symmetric = PETSC_TRUE; 1565 B->symmetric_set = PETSC_TRUE; 1566 B->structurally_symmetric_set = PETSC_TRUE; 1567 PetscFunctionReturn(0); 1568 } 1569 EXTERN_C_END 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1573 /*@C 1574 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1575 compressed row) format. For good matrix assembly performance the 1576 user should preallocate the matrix storage by setting the parameter nz 1577 (or the array nnz). By setting these parameters accurately, performance 1578 during matrix assembly can be increased by more than a factor of 50. 1579 1580 Collective on Mat 1581 1582 Input Parameters: 1583 + A - the symmetric matrix 1584 . bs - size of block 1585 . nz - number of block nonzeros per block row (same for all rows) 1586 - nnz - array containing the number of block nonzeros in the upper triangular plus 1587 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1588 1589 Options Database Keys: 1590 . -mat_no_unroll - uses code that does not unroll the loops in the 1591 block calculations (much slower) 1592 . -mat_block_size - size of the blocks to use 1593 1594 Level: intermediate 1595 1596 Notes: 1597 Specify the preallocated storage with either nz or nnz (not both). 1598 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1599 allocation. For additional details, see the users manual chapter on 1600 matrices. 1601 1602 If the nnz parameter is given then the nz parameter is ignored 1603 1604 1605 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1606 @*/ 1607 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1608 { 1609 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1610 1611 PetscFunctionBegin; 1612 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1613 if (f) { 1614 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1615 } 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #undef __FUNCT__ 1620 #define __FUNCT__ "MatCreateSeqSBAIJ" 1621 /*@C 1622 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1623 compressed row) format. For good matrix assembly performance the 1624 user should preallocate the matrix storage by setting the parameter nz 1625 (or the array nnz). By setting these parameters accurately, performance 1626 during matrix assembly can be increased by more than a factor of 50. 1627 1628 Collective on MPI_Comm 1629 1630 Input Parameters: 1631 + comm - MPI communicator, set to PETSC_COMM_SELF 1632 . bs - size of block 1633 . m - number of rows, or number of columns 1634 . nz - number of block nonzeros per block row (same for all rows) 1635 - nnz - array containing the number of block nonzeros in the upper triangular plus 1636 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1637 1638 Output Parameter: 1639 . A - the symmetric matrix 1640 1641 Options Database Keys: 1642 . -mat_no_unroll - uses code that does not unroll the loops in the 1643 block calculations (much slower) 1644 . -mat_block_size - size of the blocks to use 1645 1646 Level: intermediate 1647 1648 Notes: 1649 1650 Specify the preallocated storage with either nz or nnz (not both). 1651 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1652 allocation. For additional details, see the users manual chapter on 1653 matrices. 1654 1655 If the nnz parameter is given then the nz parameter is ignored 1656 1657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1658 @*/ 1659 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1660 { 1661 PetscErrorCode ierr; 1662 1663 PetscFunctionBegin; 1664 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1665 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1666 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1667 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1673 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1674 { 1675 Mat C; 1676 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1677 PetscErrorCode ierr; 1678 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1679 1680 PetscFunctionBegin; 1681 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1682 1683 *B = 0; 1684 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1685 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 1686 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1687 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1688 c = (Mat_SeqSBAIJ*)C->data; 1689 1690 C->preallocated = PETSC_TRUE; 1691 C->factor = A->factor; 1692 c->row = 0; 1693 c->icol = 0; 1694 c->saved_values = 0; 1695 c->keepzeroedrows = a->keepzeroedrows; 1696 C->assembled = PETSC_TRUE; 1697 1698 C->M = A->M; 1699 C->N = A->N; 1700 C->bs = A->bs; 1701 c->bs2 = a->bs2; 1702 c->mbs = a->mbs; 1703 c->nbs = a->nbs; 1704 1705 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1706 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1707 for (i=0; i<mbs; i++) { 1708 c->imax[i] = a->imax[i]; 1709 c->ilen[i] = a->ilen[i]; 1710 } 1711 ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1712 1713 /* allocate the matrix space */ 1714 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1715 c->singlemalloc = PETSC_TRUE; 1716 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1717 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1718 if (mbs > 0) { 1719 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1720 if (cpvalues == MAT_COPY_VALUES) { 1721 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1722 } else { 1723 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1724 } 1725 } 1726 1727 c->sorted = a->sorted; 1728 c->roworiented = a->roworiented; 1729 c->nonew = a->nonew; 1730 1731 if (a->diag) { 1732 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1733 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1734 for (i=0; i<mbs; i++) { 1735 c->diag[i] = a->diag[i]; 1736 } 1737 } else c->diag = 0; 1738 c->nz = a->nz; 1739 c->maxnz = a->maxnz; 1740 c->solve_work = 0; 1741 c->mult_work = 0; 1742 *B = C; 1743 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1749 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1750 { 1751 Mat_SeqSBAIJ *a; 1752 Mat B; 1753 PetscErrorCode ierr; 1754 int fd; 1755 PetscMPIInt size; 1756 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1757 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1758 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1759 PetscInt *masked,nmask,tmp,bs2,ishift; 1760 PetscScalar *aa; 1761 MPI_Comm comm = ((PetscObject)viewer)->comm; 1762 1763 PetscFunctionBegin; 1764 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1765 bs2 = bs*bs; 1766 1767 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1768 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1769 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1770 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1771 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1772 M = header[1]; N = header[2]; nz = header[3]; 1773 1774 if (header[3] < 0) { 1775 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1776 } 1777 1778 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1779 1780 /* 1781 This code adds extra rows to make sure the number of rows is 1782 divisible by the blocksize 1783 */ 1784 mbs = M/bs; 1785 extra_rows = bs - M + bs*(mbs); 1786 if (extra_rows == bs) extra_rows = 0; 1787 else mbs++; 1788 if (extra_rows) { 1789 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1790 } 1791 1792 /* read in row lengths */ 1793 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1794 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1795 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1796 1797 /* read in column indices */ 1798 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1799 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1800 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1801 1802 /* loop over row lengths determining block row lengths */ 1803 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1804 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1805 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1806 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1807 masked = mask + mbs; 1808 rowcount = 0; nzcount = 0; 1809 for (i=0; i<mbs; i++) { 1810 nmask = 0; 1811 for (j=0; j<bs; j++) { 1812 kmax = rowlengths[rowcount]; 1813 for (k=0; k<kmax; k++) { 1814 tmp = jj[nzcount++]/bs; /* block col. index */ 1815 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1816 } 1817 rowcount++; 1818 } 1819 s_browlengths[i] += nmask; 1820 1821 /* zero out the mask elements we set */ 1822 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1823 } 1824 1825 /* create our matrix */ 1826 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1827 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1828 ierr = MatSetType(B,type);CHKERRQ(ierr); 1829 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1830 a = (Mat_SeqSBAIJ*)B->data; 1831 1832 /* set matrix "i" values */ 1833 a->i[0] = 0; 1834 for (i=1; i<= mbs; i++) { 1835 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1836 a->ilen[i-1] = s_browlengths[i-1]; 1837 } 1838 a->nz = a->i[mbs]; 1839 1840 /* read in nonzero values */ 1841 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1842 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1843 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1844 1845 /* set "a" and "j" values into matrix */ 1846 nzcount = 0; jcount = 0; 1847 for (i=0; i<mbs; i++) { 1848 nzcountb = nzcount; 1849 nmask = 0; 1850 for (j=0; j<bs; j++) { 1851 kmax = rowlengths[i*bs+j]; 1852 for (k=0; k<kmax; k++) { 1853 tmp = jj[nzcount++]/bs; /* block col. index */ 1854 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1855 } 1856 } 1857 /* sort the masked values */ 1858 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1859 1860 /* set "j" values into matrix */ 1861 maskcount = 1; 1862 for (j=0; j<nmask; j++) { 1863 a->j[jcount++] = masked[j]; 1864 mask[masked[j]] = maskcount++; 1865 } 1866 1867 /* set "a" values into matrix */ 1868 ishift = bs2*a->i[i]; 1869 for (j=0; j<bs; j++) { 1870 kmax = rowlengths[i*bs+j]; 1871 for (k=0; k<kmax; k++) { 1872 tmp = jj[nzcountb]/bs ; /* block col. index */ 1873 if (tmp >= i){ 1874 block = mask[tmp] - 1; 1875 point = jj[nzcountb] - bs*tmp; 1876 idx = ishift + bs2*block + j + bs*point; 1877 a->a[idx] = aa[nzcountb]; 1878 } 1879 nzcountb++; 1880 } 1881 } 1882 /* zero out the mask elements we set */ 1883 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1884 } 1885 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1886 1887 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1888 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1889 ierr = PetscFree(aa);CHKERRQ(ierr); 1890 ierr = PetscFree(jj);CHKERRQ(ierr); 1891 ierr = PetscFree(mask);CHKERRQ(ierr); 1892 1893 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1894 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1895 ierr = MatView_Private(B);CHKERRQ(ierr); 1896 *A = B; 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1902 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1903 { 1904 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1905 MatScalar *aa=a->a,*v,*v1; 1906 PetscScalar *x,*b,*t,sum,d; 1907 PetscErrorCode ierr; 1908 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1909 PetscInt nz,nz1,*vj,*vj1,i; 1910 1911 PetscFunctionBegin; 1912 its = its*lits; 1913 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1914 1915 if (bs > 1) 1916 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1917 1918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1919 if (xx != bb) { 1920 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1921 } else { 1922 b = x; 1923 } 1924 1925 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1926 1927 if (flag & SOR_ZERO_INITIAL_GUESS) { 1928 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1929 for (i=0; i<m; i++) 1930 t[i] = b[i]; 1931 1932 for (i=0; i<m; i++){ 1933 d = *(aa + ai[i]); /* diag[i] */ 1934 v = aa + ai[i] + 1; 1935 vj = aj + ai[i] + 1; 1936 nz = ai[i+1] - ai[i] - 1; 1937 x[i] = omega*t[i]/d; 1938 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1939 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1940 } 1941 } 1942 1943 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1944 for (i=0; i<m; i++) 1945 t[i] = b[i]; 1946 1947 for (i=0; i<m-1; i++){ /* update rhs */ 1948 v = aa + ai[i] + 1; 1949 vj = aj + ai[i] + 1; 1950 nz = ai[i+1] - ai[i] - 1; 1951 while (nz--) t[*vj++] -= x[i]*(*v++); 1952 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1953 } 1954 for (i=m-1; i>=0; i--){ 1955 d = *(aa + ai[i]); 1956 v = aa + ai[i] + 1; 1957 vj = aj + ai[i] + 1; 1958 nz = ai[i+1] - ai[i] - 1; 1959 sum = t[i]; 1960 while (nz--) sum -= x[*vj++]*(*v++); 1961 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1962 x[i] = (1-omega)*x[i] + omega*sum/d; 1963 } 1964 } 1965 its--; 1966 } 1967 1968 while (its--) { 1969 /* 1970 forward sweep: 1971 for i=0,...,m-1: 1972 sum[i] = (b[i] - U(i,:)x )/d[i]; 1973 x[i] = (1-omega)x[i] + omega*sum[i]; 1974 b = b - x[i]*U^T(i,:); 1975 1976 */ 1977 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1978 for (i=0; i<m; i++) 1979 t[i] = b[i]; 1980 1981 for (i=0; i<m; i++){ 1982 d = *(aa + ai[i]); /* diag[i] */ 1983 v = aa + ai[i] + 1; v1=v; 1984 vj = aj + ai[i] + 1; vj1=vj; 1985 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1986 sum = t[i]; 1987 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1988 x[i] = (1-omega)*x[i] + omega*sum/d; 1989 while (nz--) t[*vj++] -= x[i]*(*v++); 1990 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 1991 } 1992 } 1993 1994 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1995 /* 1996 backward sweep: 1997 b = b - x[i]*U^T(i,:), i=0,...,n-2 1998 for i=m-1,...,0: 1999 sum[i] = (b[i] - U(i,:)x )/d[i]; 2000 x[i] = (1-omega)x[i] + omega*sum[i]; 2001 */ 2002 for (i=0; i<m; i++) 2003 t[i] = b[i]; 2004 2005 for (i=0; i<m-1; i++){ /* update rhs */ 2006 v = aa + ai[i] + 1; 2007 vj = aj + ai[i] + 1; 2008 nz = ai[i+1] - ai[i] - 1; 2009 while (nz--) t[*vj++] -= x[i]*(*v++); 2010 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2011 } 2012 for (i=m-1; i>=0; i--){ 2013 d = *(aa + ai[i]); 2014 v = aa + ai[i] + 1; 2015 vj = aj + ai[i] + 1; 2016 nz = ai[i+1] - ai[i] - 1; 2017 sum = t[i]; 2018 while (nz--) sum -= x[*vj++]*(*v++); 2019 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2020 x[i] = (1-omega)*x[i] + omega*sum/d; 2021 } 2022 } 2023 } 2024 2025 ierr = PetscFree(t);CHKERRQ(ierr); 2026 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2027 if (bb != xx) { 2028 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 2034 2035 2036 2037 2038