1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 101 if (a->row) { 102 ierr = ISDestroy(a->row);CHKERRQ(ierr); 103 } 104 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 105 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 106 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 107 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 108 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 111 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 131 { 132 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 switch (op) { 137 case MAT_ROW_ORIENTED: 138 a->roworiented = PETSC_TRUE; 139 break; 140 case MAT_COLUMN_ORIENTED: 141 a->roworiented = PETSC_FALSE; 142 break; 143 case MAT_COLUMNS_SORTED: 144 a->sorted = PETSC_TRUE; 145 break; 146 case MAT_COLUMNS_UNSORTED: 147 a->sorted = PETSC_FALSE; 148 break; 149 case MAT_KEEP_ZEROED_ROWS: 150 a->keepzeroedrows = PETSC_TRUE; 151 break; 152 case MAT_NO_NEW_NONZERO_LOCATIONS: 153 a->nonew = 1; 154 break; 155 case MAT_NEW_NONZERO_LOCATION_ERR: 156 a->nonew = -1; 157 break; 158 case MAT_NEW_NONZERO_ALLOCATION_ERR: 159 a->nonew = -2; 160 break; 161 case MAT_YES_NEW_NONZERO_LOCATIONS: 162 a->nonew = 0; 163 break; 164 case MAT_ROWS_SORTED: 165 case MAT_ROWS_UNSORTED: 166 case MAT_YES_NEW_DIAGONALS: 167 case MAT_IGNORE_OFF_PROC_ENTRIES: 168 case MAT_USE_HASH_TABLE: 169 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 170 break; 171 case MAT_NO_NEW_DIAGONALS: 172 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 173 case MAT_NOT_SYMMETRIC: 174 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 175 case MAT_HERMITIAN: 176 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 177 case MAT_SYMMETRIC: 178 case MAT_STRUCTURALLY_SYMMETRIC: 179 case MAT_NOT_HERMITIAN: 180 case MAT_SYMMETRY_ETERNAL: 181 case MAT_NOT_SYMMETRY_ETERNAL: 182 case MAT_IGNORE_LOWER_TRIANGULAR: 183 a->ignore_ltriangular = PETSC_TRUE; 184 break; 185 case MAT_ERROR_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_FALSE; 187 break; 188 default: 189 SETERRQ(PETSC_ERR_SUP,"unknown option"); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 197 { 198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 199 PetscErrorCode ierr; 200 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 201 MatScalar *aa,*aa_i; 202 PetscScalar *v_i; 203 204 PetscFunctionBegin; 205 bs = A->bs; 206 ai = a->i; 207 aj = a->j; 208 aa = a->a; 209 bs2 = a->bs2; 210 211 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 212 213 bn = row/bs; /* Block number */ 214 bp = row % bs; /* Block position */ 215 M = ai[bn+1] - ai[bn]; 216 *ncols = bs*M; 217 218 if (v) { 219 *v = 0; 220 if (*ncols) { 221 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 222 for (i=0; i<M; i++) { /* for each block in the block row */ 223 v_i = *v + i*bs; 224 aa_i = aa + bs2*(ai[bn] + i); 225 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 226 } 227 } 228 } 229 230 if (cols) { 231 *cols = 0; 232 if (*ncols) { 233 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 234 for (i=0; i<M; i++) { /* for each block in the block row */ 235 cols_i = *cols + i*bs; 236 itmp = bs*aj[ai[bn] + i]; 237 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 238 } 239 } 240 } 241 242 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 243 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 244 #ifdef column_search 245 v_i = *v + M*bs; 246 cols_i = *cols + M*bs; 247 for (i=0; i<bn; i++){ /* for each block row */ 248 M = ai[i+1] - ai[i]; 249 for (j=0; j<M; j++){ 250 itmp = aj[ai[i] + j]; /* block column value */ 251 if (itmp == bn){ 252 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 253 for (k=0; k<bs; k++) { 254 *cols_i++ = i*bs+k; 255 *v_i++ = aa_i[k]; 256 } 257 *ncols += bs; 258 break; 259 } 260 } 261 } 262 #endif 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 268 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 269 { 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 274 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 280 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 281 { 282 PetscErrorCode ierr; 283 PetscFunctionBegin; 284 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 290 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 291 { 292 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 293 PetscErrorCode ierr; 294 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 295 const char *name; 296 PetscViewerFormat format; 297 298 PetscFunctionBegin; 299 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 300 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 301 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 302 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 303 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 304 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 305 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 306 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 307 for (i=0; i<a->mbs; i++) { 308 for (j=0; j<bs; j++) { 309 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 310 for (k=a->i[i]; k<a->i[i+1]; k++) { 311 for (l=0; l<bs; l++) { 312 #if defined(PETSC_USE_COMPLEX) 313 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 314 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 315 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 316 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } 322 #else 323 if (a->a[bs2*k + l*bs + j] != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 325 } 326 #endif 327 } 328 } 329 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 333 } else { 334 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 335 for (i=0; i<a->mbs; i++) { 336 for (j=0; j<bs; j++) { 337 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 338 for (k=a->i[i]; k<a->i[i+1]; k++) { 339 for (l=0; l<bs; l++) { 340 #if defined(PETSC_USE_COMPLEX) 341 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 342 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 343 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 344 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } 350 #else 351 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 352 #endif 353 } 354 } 355 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 356 } 357 } 358 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 359 } 360 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 366 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 367 { 368 Mat A = (Mat) Aa; 369 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 372 PetscMPIInt rank; 373 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 374 MatScalar *aa; 375 MPI_Comm comm; 376 PetscViewer viewer; 377 378 PetscFunctionBegin; 379 /* 380 This is nasty. If this is called from an originally parallel matrix 381 then all processes call this,but only the first has the matrix so the 382 rest should return immediately. 383 */ 384 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 385 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 386 if (rank) PetscFunctionReturn(0); 387 388 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 389 390 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 391 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 392 393 /* loop over matrix elements drawing boxes */ 394 color = PETSC_DRAW_BLUE; 395 for (i=0,row=0; i<mbs; i++,row+=bs) { 396 for (j=a->i[i]; j<a->i[i+1]; j++) { 397 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 398 x_l = a->j[j]*bs; x_r = x_l + 1.0; 399 aa = a->a + j*bs2; 400 for (k=0; k<bs; k++) { 401 for (l=0; l<bs; l++) { 402 if (PetscRealPart(*aa++) >= 0.) continue; 403 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 404 } 405 } 406 } 407 } 408 color = PETSC_DRAW_CYAN; 409 for (i=0,row=0; i<mbs; i++,row+=bs) { 410 for (j=a->i[i]; j<a->i[i+1]; j++) { 411 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 412 x_l = a->j[j]*bs; x_r = x_l + 1.0; 413 aa = a->a + j*bs2; 414 for (k=0; k<bs; k++) { 415 for (l=0; l<bs; l++) { 416 if (PetscRealPart(*aa++) != 0.) continue; 417 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 418 } 419 } 420 } 421 } 422 423 color = PETSC_DRAW_RED; 424 for (i=0,row=0; i<mbs; i++,row+=bs) { 425 for (j=a->i[i]; j<a->i[i+1]; j++) { 426 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 427 x_l = a->j[j]*bs; x_r = x_l + 1.0; 428 aa = a->a + j*bs2; 429 for (k=0; k<bs; k++) { 430 for (l=0; l<bs; l++) { 431 if (PetscRealPart(*aa++) <= 0.) continue; 432 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 433 } 434 } 435 } 436 } 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 442 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 443 { 444 PetscErrorCode ierr; 445 PetscReal xl,yl,xr,yr,w,h; 446 PetscDraw draw; 447 PetscTruth isnull; 448 449 PetscFunctionBegin; 450 451 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 452 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 453 454 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 455 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 456 xr += w; yr += h; xl = -w; yl = -h; 457 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 458 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 459 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatView_SeqSBAIJ" 465 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 466 { 467 PetscErrorCode ierr; 468 PetscTruth iascii,isdraw; 469 470 PetscFunctionBegin; 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 472 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 473 if (iascii){ 474 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 475 } else if (isdraw) { 476 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 477 } else { 478 Mat B; 479 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 480 ierr = MatView(B,viewer);CHKERRQ(ierr); 481 ierr = MatDestroy(B);CHKERRQ(ierr); 482 } 483 PetscFunctionReturn(0); 484 } 485 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 489 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 490 { 491 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 492 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 493 PetscInt *ai = a->i,*ailen = a->ilen; 494 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 495 MatScalar *ap,*aa = a->a,zero = 0.0; 496 497 PetscFunctionBegin; 498 for (k=0; k<m; k++) { /* loop over rows */ 499 row = im[k]; brow = row/bs; 500 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 501 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 502 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 503 nrow = ailen[brow]; 504 for (l=0; l<n; l++) { /* loop over columns */ 505 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 506 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 507 col = in[l] ; 508 bcol = col/bs; 509 cidx = col%bs; 510 ridx = row%bs; 511 high = nrow; 512 low = 0; /* assume unsorted */ 513 while (high-low > 5) { 514 t = (low+high)/2; 515 if (rp[t] > bcol) high = t; 516 else low = t; 517 } 518 for (i=low; i<high; i++) { 519 if (rp[i] > bcol) break; 520 if (rp[i] == bcol) { 521 *v++ = ap[bs2*i+bs*cidx+ridx]; 522 goto finished; 523 } 524 } 525 *v++ = zero; 526 finished:; 527 } 528 } 529 PetscFunctionReturn(0); 530 } 531 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 535 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 536 { 537 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 538 PetscErrorCode ierr; 539 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 540 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 541 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 542 PetscTruth roworiented=a->roworiented; 543 const MatScalar *value = v; 544 MatScalar *ap,*aa = a->a,*bap; 545 546 PetscFunctionBegin; 547 if (roworiented) { 548 stepval = (n-1)*bs; 549 } else { 550 stepval = (m-1)*bs; 551 } 552 for (k=0; k<m; k++) { /* loop over added rows */ 553 row = im[k]; 554 if (row < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 557 #endif 558 rp = aj + ai[row]; 559 ap = aa + bs2*ai[row]; 560 rmax = imax[row]; 561 nrow = ailen[row]; 562 low = 0; 563 high = nrow; 564 for (l=0; l<n; l++) { /* loop over added columns */ 565 if (in[l] < 0) continue; 566 col = in[l]; 567 #if defined(PETSC_USE_DEBUG) 568 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 569 #endif 570 if (col < row) continue; /* ignore lower triangular block */ 571 if (roworiented) { 572 value = v + k*(stepval+bs)*bs + l*bs; 573 } else { 574 value = v + l*(stepval+bs)*bs + k*bs; 575 } 576 if (col <= lastcol) low = 0; else high = nrow; 577 lastcol = col; 578 while (high-low > 7) { 579 t = (low+high)/2; 580 if (rp[t] > col) high = t; 581 else low = t; 582 } 583 for (i=low; i<high; i++) { 584 if (rp[i] > col) break; 585 if (rp[i] == col) { 586 bap = ap + bs2*i; 587 if (roworiented) { 588 if (is == ADD_VALUES) { 589 for (ii=0; ii<bs; ii++,value+=stepval) { 590 for (jj=ii; jj<bs2; jj+=bs) { 591 bap[jj] += *value++; 592 } 593 } 594 } else { 595 for (ii=0; ii<bs; ii++,value+=stepval) { 596 for (jj=ii; jj<bs2; jj+=bs) { 597 bap[jj] = *value++; 598 } 599 } 600 } 601 } else { 602 if (is == ADD_VALUES) { 603 for (ii=0; ii<bs; ii++,value+=stepval) { 604 for (jj=0; jj<bs; jj++) { 605 *bap++ += *value++; 606 } 607 } 608 } else { 609 for (ii=0; ii<bs; ii++,value+=stepval) { 610 for (jj=0; jj<bs; jj++) { 611 *bap++ = *value++; 612 } 613 } 614 } 615 } 616 goto noinsert2; 617 } 618 } 619 if (nonew == 1) goto noinsert2; 620 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 621 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 622 N = nrow++ - 1; 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 627 } 628 if (N >= i) { 629 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 630 } 631 rp[i] = col; 632 bap = ap + bs2*i; 633 if (roworiented) { 634 for (ii=0; ii<bs; ii++,value+=stepval) { 635 for (jj=ii; jj<bs2; jj+=bs) { 636 bap[jj] = *value++; 637 } 638 } 639 } else { 640 for (ii=0; ii<bs; ii++,value+=stepval) { 641 for (jj=0; jj<bs; jj++) { 642 *bap++ = *value++; 643 } 644 } 645 } 646 noinsert2:; 647 low = i; 648 } 649 ailen[row] = nrow; 650 } 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 656 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 657 { 658 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 659 PetscErrorCode ierr; 660 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 661 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 662 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 663 MatScalar *aa = a->a,*ap; 664 665 PetscFunctionBegin; 666 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 667 668 if (m) rmax = ailen[0]; 669 for (i=1; i<mbs; i++) { 670 /* move each row back by the amount of empty slots (fshift) before it*/ 671 fshift += imax[i-1] - ailen[i-1]; 672 rmax = PetscMax(rmax,ailen[i]); 673 if (fshift) { 674 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 675 N = ailen[i]; 676 for (j=0; j<N; j++) { 677 ip[j-fshift] = ip[j]; 678 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 679 } 680 } 681 ai[i] = ai[i-1] + ailen[i-1]; 682 } 683 if (mbs) { 684 fshift += imax[mbs-1] - ailen[mbs-1]; 685 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 686 } 687 /* reset ilen and imax for each row */ 688 for (i=0; i<mbs; i++) { 689 ailen[i] = imax[i] = ai[i+1] - ai[i]; 690 } 691 a->nz = ai[mbs]; 692 693 /* diagonals may have moved, reset it */ 694 if (a->diag) { 695 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 696 } 697 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 698 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 699 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 700 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 701 a->reallocs = 0; 702 A->info.nz_unneeded = (PetscReal)fshift*bs2; 703 PetscFunctionReturn(0); 704 } 705 706 /* 707 This function returns an array of flags which indicate the locations of contiguous 708 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 709 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 710 Assume: sizes should be long enough to hold all the values. 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 714 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 715 { 716 PetscInt i,j,k,row; 717 PetscTruth flg; 718 719 PetscFunctionBegin; 720 for (i=0,j=0; i<n; j++) { 721 row = idx[i]; 722 if (row%bs!=0) { /* Not the begining of a block */ 723 sizes[j] = 1; 724 i++; 725 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 726 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 727 i++; 728 } else { /* Begining of the block, so check if the complete block exists */ 729 flg = PETSC_TRUE; 730 for (k=1; k<bs; k++) { 731 if (row+k != idx[i+k]) { /* break in the block */ 732 flg = PETSC_FALSE; 733 break; 734 } 735 } 736 if (flg) { /* No break in the bs */ 737 sizes[j] = bs; 738 i+= bs; 739 } else { 740 sizes[j] = 1; 741 i++; 742 } 743 } 744 } 745 *bs_max = j; 746 PetscFunctionReturn(0); 747 } 748 749 750 /* Only add/insert a(i,j) with i<=j (blocks). 751 Any a(i,j) with i>j input by user is ingored. 752 */ 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 756 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 757 { 758 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 759 PetscErrorCode ierr; 760 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 761 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 762 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 763 PetscInt ridx,cidx,bs2=a->bs2; 764 MatScalar *ap,value,*aa=a->a,*bap; 765 766 PetscFunctionBegin; 767 for (k=0; k<m; k++) { /* loop over added rows */ 768 row = im[k]; /* row number */ 769 brow = row/bs; /* block row number */ 770 if (row < 0) continue; 771 #if defined(PETSC_USE_DEBUG) 772 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 773 #endif 774 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 775 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 776 rmax = imax[brow]; /* maximum space allocated for this row */ 777 nrow = ailen[brow]; /* actual length of this row */ 778 low = 0; 779 780 for (l=0; l<n; l++) { /* loop over added columns */ 781 if (in[l] < 0) continue; 782 #if defined(PETSC_USE_DEBUG) 783 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 784 #endif 785 col = in[l]; 786 bcol = col/bs; /* block col number */ 787 788 if (brow > bcol) { 789 if (a->ignore_ltriangular){ 790 continue; /* ignore lower triangular values */ 791 } else { 792 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 793 } 794 } 795 796 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 797 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 798 /* element value a(k,l) */ 799 if (roworiented) { 800 value = v[l + k*n]; 801 } else { 802 value = v[k + l*m]; 803 } 804 805 /* move pointer bap to a(k,l) quickly and add/insert value */ 806 if (col <= lastcol) low = 0; high = nrow; 807 lastcol = col; 808 while (high-low > 7) { 809 t = (low+high)/2; 810 if (rp[t] > bcol) high = t; 811 else low = t; 812 } 813 for (i=low; i<high; i++) { 814 if (rp[i] > bcol) break; 815 if (rp[i] == bcol) { 816 bap = ap + bs2*i + bs*cidx + ridx; 817 if (is == ADD_VALUES) *bap += value; 818 else *bap = value; 819 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 820 if (brow == bcol && ridx < cidx){ 821 bap = ap + bs2*i + bs*ridx + cidx; 822 if (is == ADD_VALUES) *bap += value; 823 else *bap = value; 824 } 825 goto noinsert1; 826 } 827 } 828 829 if (nonew == 1) goto noinsert1; 830 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 831 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 832 833 N = nrow++ - 1; 834 /* shift up all the later entries in this row */ 835 for (ii=N; ii>=i; ii--) { 836 rp[ii+1] = rp[ii]; 837 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 838 } 839 if (N>=i) { 840 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 841 } 842 rp[i] = bcol; 843 ap[bs2*i + bs*cidx + ridx] = value; 844 noinsert1:; 845 low = i; 846 } 847 } /* end of loop over added columns */ 848 ailen[brow] = nrow; 849 } /* end of loop over added rows */ 850 PetscFunctionReturn(0); 851 } 852 853 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 854 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 855 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 856 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 857 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 858 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar); 859 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 860 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 861 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 862 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 863 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 864 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 865 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 866 867 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 868 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 869 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 870 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 871 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 872 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 873 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 874 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 875 876 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 877 878 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 879 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 880 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 881 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 882 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 883 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 884 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 885 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 886 887 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 888 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 889 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 890 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 891 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 892 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 893 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 894 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 895 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 896 897 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 898 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 899 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 900 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 901 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 902 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 903 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 904 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 905 906 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 907 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 908 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 909 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 910 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 911 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 912 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 913 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 914 915 #ifdef HAVE_MatICCFactor 916 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 919 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 920 { 921 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 922 Mat outA; 923 PetscErrorCode ierr; 924 PetscTruth row_identity,col_identity; 925 926 PetscFunctionBegin; 927 outA = inA; 928 inA->factor = FACTOR_CHOLESKY; 929 930 if (!a->diag) { 931 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 932 } 933 /* 934 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 935 for ILU(0) factorization with natural ordering 936 */ 937 switch (a->bs) { 938 case 1: 939 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 940 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 941 inA->ops->solves = MatSolves_SeqSBAIJ_1; 942 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 943 case 2: 944 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 945 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 946 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 947 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 948 break; 949 case 3: 950 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 951 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 952 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 953 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 954 break; 955 case 4: 956 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 957 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 958 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 959 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 960 break; 961 case 5: 962 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 963 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 964 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 965 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 966 break; 967 case 6: 968 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 969 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 970 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 971 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 972 break; 973 case 7: 974 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 975 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 976 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 977 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 978 break; 979 default: 980 a->row = row; 981 a->icol = col; 982 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 983 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 984 985 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 986 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 987 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 988 989 if (!a->solve_work) { 990 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 991 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 992 } 993 } 994 995 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 #endif 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1002 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1003 { 1004 static PetscTruth called = PETSC_FALSE; 1005 MPI_Comm comm = A->comm; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1010 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1011 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1012 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 EXTERN_C_BEGIN 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1019 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1020 { 1021 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1022 PetscInt i,nz,n; 1023 1024 PetscFunctionBegin; 1025 nz = baij->maxnz; 1026 n = mat->n; 1027 for (i=0; i<nz; i++) { 1028 baij->j[i] = indices[i]; 1029 } 1030 baij->nz = nz; 1031 for (i=0; i<n; i++) { 1032 baij->ilen[i] = baij->imax[i]; 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 EXTERN_C_END 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1040 /*@ 1041 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1042 in the matrix. 1043 1044 Input Parameters: 1045 + mat - the SeqSBAIJ matrix 1046 - indices - the column indices 1047 1048 Level: advanced 1049 1050 Notes: 1051 This can be called if you have precomputed the nonzero structure of the 1052 matrix and want to provide it to the matrix object to improve the performance 1053 of the MatSetValues() operation. 1054 1055 You MUST have set the correct numbers of nonzeros per row in the call to 1056 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1057 1058 MUST be called before any calls to MatSetValues() 1059 1060 .seealso: MatCreateSeqSBAIJ 1061 @*/ 1062 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1063 { 1064 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1065 1066 PetscFunctionBegin; 1067 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1068 PetscValidPointer(indices,2); 1069 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1070 if (f) { 1071 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1072 } else { 1073 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1080 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1081 { 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1091 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1092 { 1093 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1094 PetscFunctionBegin; 1095 *array = a->a; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1101 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1102 { 1103 PetscFunctionBegin; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #include "petscblaslapack.h" 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1110 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1111 { 1112 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1113 PetscErrorCode ierr; 1114 PetscInt i,bs=Y->bs,bs2,j; 1115 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1116 1117 PetscFunctionBegin; 1118 if (str == SAME_NONZERO_PATTERN) { 1119 PetscScalar alpha = a; 1120 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1121 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1122 if (y->xtoy && y->XtoY != X) { 1123 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1124 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1125 } 1126 if (!y->xtoy) { /* get xtoy */ 1127 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1128 y->XtoY = X; 1129 } 1130 bs2 = bs*bs; 1131 for (i=0; i<x->nz; i++) { 1132 j = 0; 1133 while (j < bs2){ 1134 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1135 j++; 1136 } 1137 } 1138 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1139 } else { 1140 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1141 } 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1147 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1148 { 1149 PetscFunctionBegin; 1150 *flg = PETSC_TRUE; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1156 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1157 { 1158 PetscFunctionBegin; 1159 *flg = PETSC_TRUE; 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1165 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1166 { 1167 PetscFunctionBegin; 1168 *flg = PETSC_FALSE; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 /* -------------------------------------------------------------------*/ 1173 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1174 MatGetRow_SeqSBAIJ, 1175 MatRestoreRow_SeqSBAIJ, 1176 MatMult_SeqSBAIJ_N, 1177 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1178 MatMult_SeqSBAIJ_N, 1179 MatMultAdd_SeqSBAIJ_N, 1180 MatSolve_SeqSBAIJ_N, 1181 0, 1182 0, 1183 /*10*/ 0, 1184 0, 1185 MatCholeskyFactor_SeqSBAIJ, 1186 MatRelax_SeqSBAIJ, 1187 MatTranspose_SeqSBAIJ, 1188 /*15*/ MatGetInfo_SeqSBAIJ, 1189 MatEqual_SeqSBAIJ, 1190 MatGetDiagonal_SeqSBAIJ, 1191 MatDiagonalScale_SeqSBAIJ, 1192 MatNorm_SeqSBAIJ, 1193 /*20*/ 0, 1194 MatAssemblyEnd_SeqSBAIJ, 1195 0, 1196 MatSetOption_SeqSBAIJ, 1197 MatZeroEntries_SeqSBAIJ, 1198 /*25*/ 0, 1199 0, 1200 0, 1201 MatCholeskyFactorSymbolic_SeqSBAIJ, 1202 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1203 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1204 0, 1205 MatICCFactorSymbolic_SeqSBAIJ, 1206 MatGetArray_SeqSBAIJ, 1207 MatRestoreArray_SeqSBAIJ, 1208 /*35*/ MatDuplicate_SeqSBAIJ, 1209 0, 1210 0, 1211 0, 1212 0, 1213 /*40*/ MatAXPY_SeqSBAIJ, 1214 MatGetSubMatrices_SeqSBAIJ, 1215 MatIncreaseOverlap_SeqSBAIJ, 1216 MatGetValues_SeqSBAIJ, 1217 0, 1218 /*45*/ MatPrintHelp_SeqSBAIJ, 1219 MatScale_SeqSBAIJ, 1220 0, 1221 0, 1222 0, 1223 /*50*/ 0, 1224 MatGetRowIJ_SeqSBAIJ, 1225 MatRestoreRowIJ_SeqSBAIJ, 1226 0, 1227 0, 1228 /*55*/ 0, 1229 0, 1230 0, 1231 0, 1232 MatSetValuesBlocked_SeqSBAIJ, 1233 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1234 0, 1235 0, 1236 MatGetPetscMaps_Petsc, 1237 0, 1238 /*65*/ 0, 1239 0, 1240 0, 1241 0, 1242 0, 1243 /*70*/ MatGetRowMax_SeqSBAIJ, 1244 0, 1245 0, 1246 0, 1247 0, 1248 /*75*/ 0, 1249 0, 1250 0, 1251 0, 1252 0, 1253 /*80*/ 0, 1254 0, 1255 0, 1256 #if !defined(PETSC_USE_COMPLEX) 1257 MatGetInertia_SeqSBAIJ, 1258 #else 1259 0, 1260 #endif 1261 MatLoad_SeqSBAIJ, 1262 /*85*/ MatIsSymmetric_SeqSBAIJ, 1263 MatIsHermitian_SeqSBAIJ, 1264 MatIsStructurallySymmetric_SeqSBAIJ, 1265 0, 1266 0, 1267 /*90*/ 0, 1268 0, 1269 0, 1270 0, 1271 0, 1272 /*95*/ 0, 1273 0, 1274 0, 1275 0}; 1276 1277 EXTERN_C_BEGIN 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1280 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1281 { 1282 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1283 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 if (aij->nonew != 1) { 1288 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1289 } 1290 1291 /* allocate space for values if not already there */ 1292 if (!aij->saved_values) { 1293 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1294 } 1295 1296 /* copy values over */ 1297 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 EXTERN_C_END 1301 1302 EXTERN_C_BEGIN 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1305 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1306 { 1307 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1308 PetscErrorCode ierr; 1309 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1310 1311 PetscFunctionBegin; 1312 if (aij->nonew != 1) { 1313 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1314 } 1315 if (!aij->saved_values) { 1316 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1317 } 1318 1319 /* copy values over */ 1320 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 EXTERN_C_END 1324 1325 EXTERN_C_BEGIN 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1328 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1329 { 1330 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1331 PetscErrorCode ierr; 1332 PetscInt i,mbs,bs2; 1333 PetscTruth skipallocation = PETSC_FALSE,flg; 1334 1335 PetscFunctionBegin; 1336 B->preallocated = PETSC_TRUE; 1337 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1338 mbs = B->m/bs; 1339 bs2 = bs*bs; 1340 1341 if (mbs*bs != B->m) { 1342 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1343 } 1344 1345 if (nz == MAT_SKIP_ALLOCATION) { 1346 skipallocation = PETSC_TRUE; 1347 nz = 0; 1348 } 1349 1350 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1351 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1352 if (nnz) { 1353 for (i=0; i<mbs; i++) { 1354 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1355 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1356 } 1357 } 1358 1359 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1360 if (!flg) { 1361 switch (bs) { 1362 case 1: 1363 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1364 B->ops->solve = MatSolve_SeqSBAIJ_1; 1365 B->ops->solves = MatSolves_SeqSBAIJ_1; 1366 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1367 B->ops->mult = MatMult_SeqSBAIJ_1; 1368 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1369 break; 1370 case 2: 1371 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1372 B->ops->solve = MatSolve_SeqSBAIJ_2; 1373 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1374 B->ops->mult = MatMult_SeqSBAIJ_2; 1375 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1376 break; 1377 case 3: 1378 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1379 B->ops->solve = MatSolve_SeqSBAIJ_3; 1380 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1381 B->ops->mult = MatMult_SeqSBAIJ_3; 1382 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1383 break; 1384 case 4: 1385 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1386 B->ops->solve = MatSolve_SeqSBAIJ_4; 1387 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1388 B->ops->mult = MatMult_SeqSBAIJ_4; 1389 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1390 break; 1391 case 5: 1392 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1393 B->ops->solve = MatSolve_SeqSBAIJ_5; 1394 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1395 B->ops->mult = MatMult_SeqSBAIJ_5; 1396 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1397 break; 1398 case 6: 1399 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1400 B->ops->solve = MatSolve_SeqSBAIJ_6; 1401 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1402 B->ops->mult = MatMult_SeqSBAIJ_6; 1403 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1404 break; 1405 case 7: 1406 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1407 B->ops->solve = MatSolve_SeqSBAIJ_7; 1408 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1409 B->ops->mult = MatMult_SeqSBAIJ_7; 1410 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1411 break; 1412 } 1413 } 1414 1415 b->mbs = mbs; 1416 b->nbs = mbs; 1417 if (!skipallocation) { 1418 /* b->ilen will count nonzeros in each block row so far. */ 1419 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1420 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1421 if (!nnz) { 1422 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1423 else if (nz <= 0) nz = 1; 1424 for (i=0; i<mbs; i++) { 1425 b->imax[i] = nz; 1426 } 1427 nz = nz*mbs; /* total nz */ 1428 } else { 1429 nz = 0; 1430 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1431 } 1432 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1433 1434 /* allocate the matrix space */ 1435 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1436 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1437 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1438 b->singlemalloc = PETSC_TRUE; 1439 1440 /* pointer to beginning of each row */ 1441 b->i[0] = 0; 1442 for (i=1; i<mbs+1; i++) { 1443 b->i[i] = b->i[i-1] + b->imax[i-1]; 1444 } 1445 } 1446 1447 B->bs = bs; 1448 b->bs2 = bs2; 1449 b->nz = 0; 1450 b->maxnz = nz*bs2; 1451 1452 b->inew = 0; 1453 b->jnew = 0; 1454 b->anew = 0; 1455 b->a2anew = 0; 1456 b->permute = PETSC_FALSE; 1457 PetscFunctionReturn(0); 1458 } 1459 EXTERN_C_END 1460 1461 EXTERN_C_BEGIN 1462 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1463 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1464 EXTERN_C_END 1465 1466 /*MC 1467 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1468 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1469 1470 Options Database Keys: 1471 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1472 1473 Level: beginner 1474 1475 .seealso: MatCreateSeqSBAIJ 1476 M*/ 1477 1478 EXTERN_C_BEGIN 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1481 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1482 { 1483 Mat_SeqSBAIJ *b; 1484 PetscErrorCode ierr; 1485 PetscMPIInt size; 1486 PetscTruth flg; 1487 1488 PetscFunctionBegin; 1489 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1490 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1491 B->m = B->M = PetscMax(B->m,B->M); 1492 B->n = B->N = PetscMax(B->n,B->N); 1493 1494 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1495 B->data = (void*)b; 1496 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1497 B->ops->destroy = MatDestroy_SeqSBAIJ; 1498 B->ops->view = MatView_SeqSBAIJ; 1499 B->factor = 0; 1500 B->mapping = 0; 1501 b->row = 0; 1502 b->icol = 0; 1503 b->reallocs = 0; 1504 b->saved_values = 0; 1505 1506 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1507 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1508 1509 b->sorted = PETSC_FALSE; 1510 b->roworiented = PETSC_TRUE; 1511 b->nonew = 0; 1512 b->diag = 0; 1513 b->solve_work = 0; 1514 b->mult_work = 0; 1515 B->spptr = 0; 1516 b->keepzeroedrows = PETSC_FALSE; 1517 b->xtoy = 0; 1518 b->XtoY = 0; 1519 1520 b->inew = 0; 1521 b->jnew = 0; 1522 b->anew = 0; 1523 b->a2anew = 0; 1524 b->permute = PETSC_FALSE; 1525 1526 b->ignore_ltriangular = PETSC_FALSE; 1527 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1528 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1529 1530 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1531 "MatStoreValues_SeqSBAIJ", 1532 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1534 "MatRetrieveValues_SeqSBAIJ", 1535 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1537 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1538 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1539 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1540 "MatConvert_SeqSBAIJ_SeqAIJ", 1541 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1542 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1543 "MatConvert_SeqSBAIJ_SeqBAIJ", 1544 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1545 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1546 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1547 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1548 1549 B->symmetric = PETSC_TRUE; 1550 B->structurally_symmetric = PETSC_TRUE; 1551 B->symmetric_set = PETSC_TRUE; 1552 B->structurally_symmetric_set = PETSC_TRUE; 1553 PetscFunctionReturn(0); 1554 } 1555 EXTERN_C_END 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1559 /*@C 1560 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1561 compressed row) format. For good matrix assembly performance the 1562 user should preallocate the matrix storage by setting the parameter nz 1563 (or the array nnz). By setting these parameters accurately, performance 1564 during matrix assembly can be increased by more than a factor of 50. 1565 1566 Collective on Mat 1567 1568 Input Parameters: 1569 + A - the symmetric matrix 1570 . bs - size of block 1571 . nz - number of block nonzeros per block row (same for all rows) 1572 - nnz - array containing the number of block nonzeros in the upper triangular plus 1573 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1574 1575 Options Database Keys: 1576 . -mat_no_unroll - uses code that does not unroll the loops in the 1577 block calculations (much slower) 1578 . -mat_block_size - size of the blocks to use 1579 1580 Level: intermediate 1581 1582 Notes: 1583 Specify the preallocated storage with either nz or nnz (not both). 1584 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1585 allocation. For additional details, see the users manual chapter on 1586 matrices. 1587 1588 If the nnz parameter is given then the nz parameter is ignored 1589 1590 1591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1592 @*/ 1593 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1594 { 1595 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1596 1597 PetscFunctionBegin; 1598 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1599 if (f) { 1600 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatCreateSeqSBAIJ" 1607 /*@C 1608 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1609 compressed row) format. For good matrix assembly performance the 1610 user should preallocate the matrix storage by setting the parameter nz 1611 (or the array nnz). By setting these parameters accurately, performance 1612 during matrix assembly can be increased by more than a factor of 50. 1613 1614 Collective on MPI_Comm 1615 1616 Input Parameters: 1617 + comm - MPI communicator, set to PETSC_COMM_SELF 1618 . bs - size of block 1619 . m - number of rows, or number of columns 1620 . nz - number of block nonzeros per block row (same for all rows) 1621 - nnz - array containing the number of block nonzeros in the upper triangular plus 1622 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1623 1624 Output Parameter: 1625 . A - the symmetric matrix 1626 1627 Options Database Keys: 1628 . -mat_no_unroll - uses code that does not unroll the loops in the 1629 block calculations (much slower) 1630 . -mat_block_size - size of the blocks to use 1631 1632 Level: intermediate 1633 1634 Notes: 1635 1636 Specify the preallocated storage with either nz or nnz (not both). 1637 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1638 allocation. For additional details, see the users manual chapter on 1639 matrices. 1640 1641 If the nnz parameter is given then the nz parameter is ignored 1642 1643 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1644 @*/ 1645 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1646 { 1647 PetscErrorCode ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1651 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1652 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1653 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1659 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1660 { 1661 Mat C; 1662 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1663 PetscErrorCode ierr; 1664 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1665 1666 PetscFunctionBegin; 1667 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1668 1669 *B = 0; 1670 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1671 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 1672 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1673 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1674 c = (Mat_SeqSBAIJ*)C->data; 1675 1676 C->preallocated = PETSC_TRUE; 1677 C->factor = A->factor; 1678 c->row = 0; 1679 c->icol = 0; 1680 c->saved_values = 0; 1681 c->keepzeroedrows = a->keepzeroedrows; 1682 C->assembled = PETSC_TRUE; 1683 1684 C->M = A->M; 1685 C->N = A->N; 1686 C->bs = A->bs; 1687 c->bs2 = a->bs2; 1688 c->mbs = a->mbs; 1689 c->nbs = a->nbs; 1690 1691 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1692 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1693 for (i=0; i<mbs; i++) { 1694 c->imax[i] = a->imax[i]; 1695 c->ilen[i] = a->ilen[i]; 1696 } 1697 ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1698 1699 /* allocate the matrix space */ 1700 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1701 c->singlemalloc = PETSC_TRUE; 1702 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1703 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1704 if (mbs > 0) { 1705 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1706 if (cpvalues == MAT_COPY_VALUES) { 1707 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1708 } else { 1709 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1710 } 1711 } 1712 1713 c->sorted = a->sorted; 1714 c->roworiented = a->roworiented; 1715 c->nonew = a->nonew; 1716 1717 if (a->diag) { 1718 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1719 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1720 for (i=0; i<mbs; i++) { 1721 c->diag[i] = a->diag[i]; 1722 } 1723 } else c->diag = 0; 1724 c->nz = a->nz; 1725 c->maxnz = a->maxnz; 1726 c->solve_work = 0; 1727 c->mult_work = 0; 1728 *B = C; 1729 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1735 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1736 { 1737 Mat_SeqSBAIJ *a; 1738 Mat B; 1739 PetscErrorCode ierr; 1740 int fd; 1741 PetscMPIInt size; 1742 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1743 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1744 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1745 PetscInt *masked,nmask,tmp,bs2,ishift; 1746 PetscScalar *aa; 1747 MPI_Comm comm = ((PetscObject)viewer)->comm; 1748 1749 PetscFunctionBegin; 1750 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1751 bs2 = bs*bs; 1752 1753 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1754 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1755 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1756 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1757 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1758 M = header[1]; N = header[2]; nz = header[3]; 1759 1760 if (header[3] < 0) { 1761 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1762 } 1763 1764 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1765 1766 /* 1767 This code adds extra rows to make sure the number of rows is 1768 divisible by the blocksize 1769 */ 1770 mbs = M/bs; 1771 extra_rows = bs - M + bs*(mbs); 1772 if (extra_rows == bs) extra_rows = 0; 1773 else mbs++; 1774 if (extra_rows) { 1775 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1776 } 1777 1778 /* read in row lengths */ 1779 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1780 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1781 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1782 1783 /* read in column indices */ 1784 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1785 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1786 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1787 1788 /* loop over row lengths determining block row lengths */ 1789 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1790 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1791 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1792 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1793 masked = mask + mbs; 1794 rowcount = 0; nzcount = 0; 1795 for (i=0; i<mbs; i++) { 1796 nmask = 0; 1797 for (j=0; j<bs; j++) { 1798 kmax = rowlengths[rowcount]; 1799 for (k=0; k<kmax; k++) { 1800 tmp = jj[nzcount++]/bs; /* block col. index */ 1801 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1802 } 1803 rowcount++; 1804 } 1805 s_browlengths[i] += nmask; 1806 1807 /* zero out the mask elements we set */ 1808 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1809 } 1810 1811 /* create our matrix */ 1812 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1813 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1814 ierr = MatSetType(B,type);CHKERRQ(ierr); 1815 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1816 a = (Mat_SeqSBAIJ*)B->data; 1817 1818 /* set matrix "i" values */ 1819 a->i[0] = 0; 1820 for (i=1; i<= mbs; i++) { 1821 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1822 a->ilen[i-1] = s_browlengths[i-1]; 1823 } 1824 a->nz = a->i[mbs]; 1825 1826 /* read in nonzero values */ 1827 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1828 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1829 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1830 1831 /* set "a" and "j" values into matrix */ 1832 nzcount = 0; jcount = 0; 1833 for (i=0; i<mbs; i++) { 1834 nzcountb = nzcount; 1835 nmask = 0; 1836 for (j=0; j<bs; j++) { 1837 kmax = rowlengths[i*bs+j]; 1838 for (k=0; k<kmax; k++) { 1839 tmp = jj[nzcount++]/bs; /* block col. index */ 1840 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1841 } 1842 } 1843 /* sort the masked values */ 1844 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1845 1846 /* set "j" values into matrix */ 1847 maskcount = 1; 1848 for (j=0; j<nmask; j++) { 1849 a->j[jcount++] = masked[j]; 1850 mask[masked[j]] = maskcount++; 1851 } 1852 1853 /* set "a" values into matrix */ 1854 ishift = bs2*a->i[i]; 1855 for (j=0; j<bs; j++) { 1856 kmax = rowlengths[i*bs+j]; 1857 for (k=0; k<kmax; k++) { 1858 tmp = jj[nzcountb]/bs ; /* block col. index */ 1859 if (tmp >= i){ 1860 block = mask[tmp] - 1; 1861 point = jj[nzcountb] - bs*tmp; 1862 idx = ishift + bs2*block + j + bs*point; 1863 a->a[idx] = aa[nzcountb]; 1864 } 1865 nzcountb++; 1866 } 1867 } 1868 /* zero out the mask elements we set */ 1869 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1870 } 1871 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1872 1873 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1874 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1875 ierr = PetscFree(aa);CHKERRQ(ierr); 1876 ierr = PetscFree(jj);CHKERRQ(ierr); 1877 ierr = PetscFree(mask);CHKERRQ(ierr); 1878 1879 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatView_Private(B);CHKERRQ(ierr); 1882 *A = B; 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1888 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1889 { 1890 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1891 MatScalar *aa=a->a,*v,*v1; 1892 PetscScalar *x,*b,*t,sum,d; 1893 PetscErrorCode ierr; 1894 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1895 PetscInt nz,nz1,*vj,*vj1,i; 1896 1897 PetscFunctionBegin; 1898 its = its*lits; 1899 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1900 1901 if (bs > 1) 1902 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1903 1904 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1905 if (xx != bb) { 1906 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1907 } else { 1908 b = x; 1909 } 1910 1911 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1912 1913 if (flag & SOR_ZERO_INITIAL_GUESS) { 1914 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1915 for (i=0; i<m; i++) 1916 t[i] = b[i]; 1917 1918 for (i=0; i<m; i++){ 1919 d = *(aa + ai[i]); /* diag[i] */ 1920 v = aa + ai[i] + 1; 1921 vj = aj + ai[i] + 1; 1922 nz = ai[i+1] - ai[i] - 1; 1923 x[i] = omega*t[i]/d; 1924 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1925 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1926 } 1927 } 1928 1929 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1930 for (i=0; i<m; i++) 1931 t[i] = b[i]; 1932 1933 for (i=0; i<m-1; i++){ /* update rhs */ 1934 v = aa + ai[i] + 1; 1935 vj = aj + ai[i] + 1; 1936 nz = ai[i+1] - ai[i] - 1; 1937 while (nz--) t[*vj++] -= x[i]*(*v++); 1938 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1939 } 1940 for (i=m-1; i>=0; i--){ 1941 d = *(aa + ai[i]); 1942 v = aa + ai[i] + 1; 1943 vj = aj + ai[i] + 1; 1944 nz = ai[i+1] - ai[i] - 1; 1945 sum = t[i]; 1946 while (nz--) sum -= x[*vj++]*(*v++); 1947 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1948 x[i] = (1-omega)*x[i] + omega*sum/d; 1949 } 1950 } 1951 its--; 1952 } 1953 1954 while (its--) { 1955 /* 1956 forward sweep: 1957 for i=0,...,m-1: 1958 sum[i] = (b[i] - U(i,:)x )/d[i]; 1959 x[i] = (1-omega)x[i] + omega*sum[i]; 1960 b = b - x[i]*U^T(i,:); 1961 1962 */ 1963 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1964 for (i=0; i<m; i++) 1965 t[i] = b[i]; 1966 1967 for (i=0; i<m; i++){ 1968 d = *(aa + ai[i]); /* diag[i] */ 1969 v = aa + ai[i] + 1; v1=v; 1970 vj = aj + ai[i] + 1; vj1=vj; 1971 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1972 sum = t[i]; 1973 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1974 x[i] = (1-omega)*x[i] + omega*sum/d; 1975 while (nz--) t[*vj++] -= x[i]*(*v++); 1976 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 1977 } 1978 } 1979 1980 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1981 /* 1982 backward sweep: 1983 b = b - x[i]*U^T(i,:), i=0,...,n-2 1984 for i=m-1,...,0: 1985 sum[i] = (b[i] - U(i,:)x )/d[i]; 1986 x[i] = (1-omega)x[i] + omega*sum[i]; 1987 */ 1988 for (i=0; i<m; i++) 1989 t[i] = b[i]; 1990 1991 for (i=0; i<m-1; i++){ /* update rhs */ 1992 v = aa + ai[i] + 1; 1993 vj = aj + ai[i] + 1; 1994 nz = ai[i+1] - ai[i] - 1; 1995 while (nz--) t[*vj++] -= x[i]*(*v++); 1996 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1997 } 1998 for (i=m-1; i>=0; i--){ 1999 d = *(aa + ai[i]); 2000 v = aa + ai[i] + 1; 2001 vj = aj + ai[i] + 1; 2002 nz = ai[i+1] - ai[i] - 1; 2003 sum = t[i]; 2004 while (nz--) sum -= x[*vj++]*(*v++); 2005 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2006 x[i] = (1-omega)*x[i] + omega*sum/d; 2007 } 2008 } 2009 } 2010 2011 ierr = PetscFree(t);CHKERRQ(ierr); 2012 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2013 if (bb != xx) { 2014 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 2020 2021 2022 2023 2024