1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i; 40 41 PetscFunctionBegin; 42 if (!a->diag) { 43 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 } 45 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 if (!blockcompressed) { 61 /* malloc & create the natural set of indices */ 62 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 63 for (i=0; i<n+1; i++) { 64 for (j=0; j<bs; j++) { 65 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 66 } 67 } 68 for (i=0; i<nz; i++) { 69 for (j=0; j<bs; j++) { 70 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 71 } 72 } 73 } else { /* blockcompressed */ 74 if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 for (i=0; i<nz; i++) a->j[i]++; 77 for (i=0; i<n+1; i++) a->i[i]++; 78 } 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 88 { 89 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 90 PetscInt i,n = a->mbs,nz = a->i[n]; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 96 if (!blockcompressed) { 97 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { /* blockcompressed */ 99 for (i=0; i<nz; i++) a->j[i]--; 100 for (i=0; i<n+1; i++) a->i[i]--; 101 } 102 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 116 #endif 117 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 118 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 119 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 120 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 121 ierr = PetscFree(a->diag);CHKERRQ(ierr); 122 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 123 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 124 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 125 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 126 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 127 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 128 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 129 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 ierr = PetscFree(a);CHKERRQ(ierr); 132 133 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 146 { 147 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148 PetscErrorCode ierr; 149 150 PetscFunctionBegin; 151 switch (op) { 152 case MAT_ROW_ORIENTED: 153 a->roworiented = flg; 154 break; 155 case MAT_KEEP_ZEROED_ROWS: 156 a->keepzeroedrows = flg; 157 break; 158 case MAT_NEW_NONZERO_LOCATIONS: 159 a->nonew = (flg ? 0 : 1); 160 break; 161 case MAT_NEW_NONZERO_LOCATION_ERR: 162 a->nonew = (flg ? -1 : 0); 163 break; 164 case MAT_NEW_NONZERO_ALLOCATION_ERR: 165 a->nonew = (flg ? -2 : 0); 166 break; 167 case MAT_NEW_DIAGONALS: 168 case MAT_IGNORE_OFF_PROC_ENTRIES: 169 case MAT_USE_HASH_TABLE: 170 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 171 break; 172 case MAT_HERMITIAN: 173 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 174 case MAT_SYMMETRIC: 175 case MAT_STRUCTURALLY_SYMMETRIC: 176 case MAT_SYMMETRY_ETERNAL: 177 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 178 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 179 break; 180 case MAT_IGNORE_LOWER_TRIANGULAR: 181 a->ignore_ltriangular = flg; 182 break; 183 case MAT_ERROR_LOWER_TRIANGULAR: 184 a->ignore_ltriangular = flg; 185 break; 186 case MAT_GETROW_UPPERTRIANGULAR: 187 a->getrow_utriangular = flg; 188 break; 189 default: 190 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 191 } 192 PetscFunctionReturn(0); 193 } 194 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 197 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 198 { 199 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 200 PetscErrorCode ierr; 201 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 202 MatScalar *aa,*aa_i; 203 PetscScalar *v_i; 204 205 PetscFunctionBegin; 206 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 207 /* Get the upper triangular part of the row */ 208 bs = A->rmap.bs; 209 ai = a->i; 210 aj = a->j; 211 aa = a->a; 212 bs2 = a->bs2; 213 214 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 215 216 bn = row/bs; /* Block number */ 217 bp = row % bs; /* Block position */ 218 M = ai[bn+1] - ai[bn]; 219 *ncols = bs*M; 220 221 if (v) { 222 *v = 0; 223 if (*ncols) { 224 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 225 for (i=0; i<M; i++) { /* for each block in the block row */ 226 v_i = *v + i*bs; 227 aa_i = aa + bs2*(ai[bn] + i); 228 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 229 } 230 } 231 } 232 233 if (cols) { 234 *cols = 0; 235 if (*ncols) { 236 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 237 for (i=0; i<M; i++) { /* for each block in the block row */ 238 cols_i = *cols + i*bs; 239 itmp = bs*aj[ai[bn] + i]; 240 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 241 } 242 } 243 } 244 245 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 246 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 247 #ifdef column_search 248 v_i = *v + M*bs; 249 cols_i = *cols + M*bs; 250 for (i=0; i<bn; i++){ /* for each block row */ 251 M = ai[i+1] - ai[i]; 252 for (j=0; j<M; j++){ 253 itmp = aj[ai[i] + j]; /* block column value */ 254 if (itmp == bn){ 255 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 256 for (k=0; k<bs; k++) { 257 *cols_i++ = i*bs+k; 258 *v_i++ = aa_i[k]; 259 } 260 *ncols += bs; 261 break; 262 } 263 } 264 } 265 #endif 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 271 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 272 { 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 277 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 283 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 284 { 285 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 286 287 PetscFunctionBegin; 288 a->getrow_utriangular = PETSC_TRUE; 289 PetscFunctionReturn(0); 290 } 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 293 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 294 { 295 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 296 297 PetscFunctionBegin; 298 a->getrow_utriangular = PETSC_FALSE; 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 304 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 305 { 306 PetscErrorCode ierr; 307 PetscFunctionBegin; 308 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 314 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 315 { 316 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 317 PetscErrorCode ierr; 318 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 319 const char *name; 320 PetscViewerFormat format; 321 322 PetscFunctionBegin; 323 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 324 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 325 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 326 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 327 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 328 if (A->factor && bs>1){ 329 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 330 PetscFunctionReturn(0); 331 } 332 Mat aij; 333 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 334 ierr = MatView(aij,viewer);CHKERRQ(ierr); 335 ierr = MatDestroy(aij);CHKERRQ(ierr); 336 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 337 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 338 for (i=0; i<a->mbs; i++) { 339 for (j=0; j<bs; j++) { 340 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 341 for (k=a->i[i]; k<a->i[i+1]; k++) { 342 for (l=0; l<bs; l++) { 343 #if defined(PETSC_USE_COMPLEX) 344 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 349 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 350 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 351 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 352 } 353 #else 354 if (a->a[bs2*k + l*bs + j] != 0.0) { 355 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 356 } 357 #endif 358 } 359 } 360 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 361 } 362 } 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 364 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 365 PetscFunctionReturn(0); 366 } else { 367 if (A->factor && bs>1){ 368 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 369 } 370 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 371 for (i=0; i<a->mbs; i++) { 372 for (j=0; j<bs; j++) { 373 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 374 for (k=a->i[i]; k<a->i[i+1]; k++) { 375 for (l=0; l<bs; l++) { 376 #if defined(PETSC_USE_COMPLEX) 377 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 379 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 380 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 381 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 382 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 383 } else { 384 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 385 } 386 #else 387 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 388 #endif 389 } 390 } 391 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 395 } 396 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 397 PetscFunctionReturn(0); 398 } 399 400 #undef __FUNCT__ 401 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 402 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 403 { 404 Mat A = (Mat) Aa; 405 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 406 PetscErrorCode ierr; 407 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 408 PetscMPIInt rank; 409 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 410 MatScalar *aa; 411 MPI_Comm comm; 412 PetscViewer viewer; 413 414 PetscFunctionBegin; 415 /* 416 This is nasty. If this is called from an originally parallel matrix 417 then all processes call this,but only the first has the matrix so the 418 rest should return immediately. 419 */ 420 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 421 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 422 if (rank) PetscFunctionReturn(0); 423 424 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 425 426 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 427 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 428 429 /* loop over matrix elements drawing boxes */ 430 color = PETSC_DRAW_BLUE; 431 for (i=0,row=0; i<mbs; i++,row+=bs) { 432 for (j=a->i[i]; j<a->i[i+1]; j++) { 433 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 434 x_l = a->j[j]*bs; x_r = x_l + 1.0; 435 aa = a->a + j*bs2; 436 for (k=0; k<bs; k++) { 437 for (l=0; l<bs; l++) { 438 if (PetscRealPart(*aa++) >= 0.) continue; 439 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 440 } 441 } 442 } 443 } 444 color = PETSC_DRAW_CYAN; 445 for (i=0,row=0; i<mbs; i++,row+=bs) { 446 for (j=a->i[i]; j<a->i[i+1]; j++) { 447 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 448 x_l = a->j[j]*bs; x_r = x_l + 1.0; 449 aa = a->a + j*bs2; 450 for (k=0; k<bs; k++) { 451 for (l=0; l<bs; l++) { 452 if (PetscRealPart(*aa++) != 0.) continue; 453 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 454 } 455 } 456 } 457 } 458 459 color = PETSC_DRAW_RED; 460 for (i=0,row=0; i<mbs; i++,row+=bs) { 461 for (j=a->i[i]; j<a->i[i+1]; j++) { 462 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 463 x_l = a->j[j]*bs; x_r = x_l + 1.0; 464 aa = a->a + j*bs2; 465 for (k=0; k<bs; k++) { 466 for (l=0; l<bs; l++) { 467 if (PetscRealPart(*aa++) <= 0.) continue; 468 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 469 } 470 } 471 } 472 } 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 478 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 479 { 480 PetscErrorCode ierr; 481 PetscReal xl,yl,xr,yr,w,h; 482 PetscDraw draw; 483 PetscTruth isnull; 484 485 PetscFunctionBegin; 486 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 487 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 488 489 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 490 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 491 xr += w; yr += h; xl = -w; yl = -h; 492 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 493 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 494 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatView_SeqSBAIJ" 500 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 501 { 502 PetscErrorCode ierr; 503 PetscTruth iascii,isdraw; 504 505 PetscFunctionBegin; 506 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 507 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 508 if (iascii){ 509 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 510 } else if (isdraw) { 511 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 512 } else { 513 Mat B; 514 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 515 ierr = MatView(B,viewer);CHKERRQ(ierr); 516 ierr = MatDestroy(B);CHKERRQ(ierr); 517 } 518 PetscFunctionReturn(0); 519 } 520 521 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 524 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 525 { 526 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 527 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 528 PetscInt *ai = a->i,*ailen = a->ilen; 529 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 530 MatScalar *ap,*aa = a->a; 531 532 PetscFunctionBegin; 533 for (k=0; k<m; k++) { /* loop over rows */ 534 row = im[k]; brow = row/bs; 535 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 536 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 537 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 538 nrow = ailen[brow]; 539 for (l=0; l<n; l++) { /* loop over columns */ 540 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 541 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 542 col = in[l] ; 543 bcol = col/bs; 544 cidx = col%bs; 545 ridx = row%bs; 546 high = nrow; 547 low = 0; /* assume unsorted */ 548 while (high-low > 5) { 549 t = (low+high)/2; 550 if (rp[t] > bcol) high = t; 551 else low = t; 552 } 553 for (i=low; i<high; i++) { 554 if (rp[i] > bcol) break; 555 if (rp[i] == bcol) { 556 *v++ = ap[bs2*i+bs*cidx+ridx]; 557 goto finished; 558 } 559 } 560 *v++ = 0.0; 561 finished:; 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 570 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 571 { 572 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 573 PetscErrorCode ierr; 574 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 575 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 576 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 577 PetscTruth roworiented=a->roworiented; 578 const MatScalar *value = v; 579 MatScalar *ap,*aa = a->a,*bap; 580 581 PetscFunctionBegin; 582 if (roworiented) { 583 stepval = (n-1)*bs; 584 } else { 585 stepval = (m-1)*bs; 586 } 587 for (k=0; k<m; k++) { /* loop over added rows */ 588 row = im[k]; 589 if (row < 0) continue; 590 #if defined(PETSC_USE_DEBUG) 591 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 592 #endif 593 rp = aj + ai[row]; 594 ap = aa + bs2*ai[row]; 595 rmax = imax[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 if (in[l] < 0) continue; 601 col = in[l]; 602 #if defined(PETSC_USE_DEBUG) 603 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 604 #endif 605 if (col < row) continue; /* ignore lower triangular block */ 606 if (roworiented) { 607 value = v + k*(stepval+bs)*bs + l*bs; 608 } else { 609 value = v + l*(stepval+bs)*bs + k*bs; 610 } 611 if (col <= lastcol) low = 0; else high = nrow; 612 lastcol = col; 613 while (high-low > 7) { 614 t = (low+high)/2; 615 if (rp[t] > col) high = t; 616 else low = t; 617 } 618 for (i=low; i<high; i++) { 619 if (rp[i] > col) break; 620 if (rp[i] == col) { 621 bap = ap + bs2*i; 622 if (roworiented) { 623 if (is == ADD_VALUES) { 624 for (ii=0; ii<bs; ii++,value+=stepval) { 625 for (jj=ii; jj<bs2; jj+=bs) { 626 bap[jj] += *value++; 627 } 628 } 629 } else { 630 for (ii=0; ii<bs; ii++,value+=stepval) { 631 for (jj=ii; jj<bs2; jj+=bs) { 632 bap[jj] = *value++; 633 } 634 } 635 } 636 } else { 637 if (is == ADD_VALUES) { 638 for (ii=0; ii<bs; ii++,value+=stepval) { 639 for (jj=0; jj<bs; jj++) { 640 *bap++ += *value++; 641 } 642 } 643 } else { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=0; jj<bs; jj++) { 646 *bap++ = *value++; 647 } 648 } 649 } 650 } 651 goto noinsert2; 652 } 653 } 654 if (nonew == 1) goto noinsert2; 655 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 656 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 657 N = nrow++ - 1; high++; 658 /* shift up all the later entries in this row */ 659 for (ii=N; ii>=i; ii--) { 660 rp[ii+1] = rp[ii]; 661 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 662 } 663 if (N >= i) { 664 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 665 } 666 rp[i] = col; 667 bap = ap + bs2*i; 668 if (roworiented) { 669 for (ii=0; ii<bs; ii++,value+=stepval) { 670 for (jj=ii; jj<bs2; jj+=bs) { 671 bap[jj] = *value++; 672 } 673 } 674 } else { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=0; jj<bs; jj++) { 677 *bap++ = *value++; 678 } 679 } 680 } 681 noinsert2:; 682 low = i; 683 } 684 ailen[row] = nrow; 685 } 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 691 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 692 { 693 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 694 PetscErrorCode ierr; 695 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 696 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 697 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 698 MatScalar *aa = a->a,*ap; 699 700 PetscFunctionBegin; 701 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 702 703 if (m) rmax = ailen[0]; 704 for (i=1; i<mbs; i++) { 705 /* move each row back by the amount of empty slots (fshift) before it*/ 706 fshift += imax[i-1] - ailen[i-1]; 707 rmax = PetscMax(rmax,ailen[i]); 708 if (fshift) { 709 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 710 N = ailen[i]; 711 for (j=0; j<N; j++) { 712 ip[j-fshift] = ip[j]; 713 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 714 } 715 } 716 ai[i] = ai[i-1] + ailen[i-1]; 717 } 718 if (mbs) { 719 fshift += imax[mbs-1] - ailen[mbs-1]; 720 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 721 } 722 /* reset ilen and imax for each row */ 723 for (i=0; i<mbs; i++) { 724 ailen[i] = imax[i] = ai[i+1] - ai[i]; 725 } 726 a->nz = ai[mbs]; 727 728 /* diagonals may have moved, reset it */ 729 if (a->diag) { 730 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 731 } 732 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 733 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 734 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 735 a->reallocs = 0; 736 A->info.nz_unneeded = (PetscReal)fshift*bs2; 737 PetscFunctionReturn(0); 738 } 739 740 /* 741 This function returns an array of flags which indicate the locations of contiguous 742 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 743 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 744 Assume: sizes should be long enough to hold all the values. 745 */ 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 748 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 749 { 750 PetscInt i,j,k,row; 751 PetscTruth flg; 752 753 PetscFunctionBegin; 754 for (i=0,j=0; i<n; j++) { 755 row = idx[i]; 756 if (row%bs!=0) { /* Not the begining of a block */ 757 sizes[j] = 1; 758 i++; 759 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 760 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 761 i++; 762 } else { /* Begining of the block, so check if the complete block exists */ 763 flg = PETSC_TRUE; 764 for (k=1; k<bs; k++) { 765 if (row+k != idx[i+k]) { /* break in the block */ 766 flg = PETSC_FALSE; 767 break; 768 } 769 } 770 if (flg) { /* No break in the bs */ 771 sizes[j] = bs; 772 i+= bs; 773 } else { 774 sizes[j] = 1; 775 i++; 776 } 777 } 778 } 779 *bs_max = j; 780 PetscFunctionReturn(0); 781 } 782 783 784 /* Only add/insert a(i,j) with i<=j (blocks). 785 Any a(i,j) with i>j input by user is ingored. 786 */ 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 790 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 791 { 792 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 793 PetscErrorCode ierr; 794 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 795 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 796 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 797 PetscInt ridx,cidx,bs2=a->bs2; 798 MatScalar *ap,value,*aa=a->a,*bap; 799 800 PetscFunctionBegin; 801 for (k=0; k<m; k++) { /* loop over added rows */ 802 row = im[k]; /* row number */ 803 brow = row/bs; /* block row number */ 804 if (row < 0) continue; 805 #if defined(PETSC_USE_DEBUG) 806 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 807 #endif 808 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 809 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 810 rmax = imax[brow]; /* maximum space allocated for this row */ 811 nrow = ailen[brow]; /* actual length of this row */ 812 low = 0; 813 814 for (l=0; l<n; l++) { /* loop over added columns */ 815 if (in[l] < 0) continue; 816 #if defined(PETSC_USE_DEBUG) 817 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 818 #endif 819 col = in[l]; 820 bcol = col/bs; /* block col number */ 821 822 if (brow > bcol) { 823 if (a->ignore_ltriangular){ 824 continue; /* ignore lower triangular values */ 825 } else { 826 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 827 } 828 } 829 830 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 831 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 832 /* element value a(k,l) */ 833 if (roworiented) { 834 value = v[l + k*n]; 835 } else { 836 value = v[k + l*m]; 837 } 838 839 /* move pointer bap to a(k,l) quickly and add/insert value */ 840 if (col <= lastcol) low = 0; high = nrow; 841 lastcol = col; 842 while (high-low > 7) { 843 t = (low+high)/2; 844 if (rp[t] > bcol) high = t; 845 else low = t; 846 } 847 for (i=low; i<high; i++) { 848 if (rp[i] > bcol) break; 849 if (rp[i] == bcol) { 850 bap = ap + bs2*i + bs*cidx + ridx; 851 if (is == ADD_VALUES) *bap += value; 852 else *bap = value; 853 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 854 if (brow == bcol && ridx < cidx){ 855 bap = ap + bs2*i + bs*ridx + cidx; 856 if (is == ADD_VALUES) *bap += value; 857 else *bap = value; 858 } 859 goto noinsert1; 860 } 861 } 862 863 if (nonew == 1) goto noinsert1; 864 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 865 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 866 867 N = nrow++ - 1; high++; 868 /* shift up all the later entries in this row */ 869 for (ii=N; ii>=i; ii--) { 870 rp[ii+1] = rp[ii]; 871 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 872 } 873 if (N>=i) { 874 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 875 } 876 rp[i] = bcol; 877 ap[bs2*i + bs*cidx + ridx] = value; 878 noinsert1:; 879 low = i; 880 } 881 } /* end of loop over added columns */ 882 ailen[brow] = nrow; 883 } /* end of loop over added rows */ 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 889 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 890 { 891 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 892 Mat outA; 893 PetscErrorCode ierr; 894 PetscTruth row_identity; 895 896 PetscFunctionBegin; 897 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 898 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 899 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 900 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 901 902 outA = inA; 903 inA->factor = FACTOR_CHOLESKY; 904 905 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 906 /* 907 Blocksize < 8 have a special faster factorization/solver 908 for ICC(0) factorization with natural ordering 909 */ 910 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 911 case 1: 912 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 913 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 914 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 915 inA->ops->solves = MatSolves_SeqSBAIJ_1; 916 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 917 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 918 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 919 break; 920 case 2: 921 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 922 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 923 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 924 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 925 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 926 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 927 break; 928 case 3: 929 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 930 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 931 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 932 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 933 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 934 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 935 break; 936 case 4: 937 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 938 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 939 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 940 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 941 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 942 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 943 break; 944 case 5: 945 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 946 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 947 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 948 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 949 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 950 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 951 break; 952 case 6: 953 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 954 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 955 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 956 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 957 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 958 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 959 break; 960 case 7: 961 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 962 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 963 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 964 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 965 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 966 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 967 break; 968 default: 969 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 970 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 971 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 972 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 973 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 974 break; 975 } 976 977 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 978 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 979 a->row = row; 980 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 981 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 982 a->col = row; 983 984 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 985 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 986 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 987 988 if (!a->solve_work) { 989 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 990 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 991 } 992 993 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 EXTERN_C_BEGIN 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1000 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1001 { 1002 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1003 PetscInt i,nz,n; 1004 1005 PetscFunctionBegin; 1006 nz = baij->maxnz; 1007 n = mat->cmap.n; 1008 for (i=0; i<nz; i++) { 1009 baij->j[i] = indices[i]; 1010 } 1011 baij->nz = nz; 1012 for (i=0; i<n; i++) { 1013 baij->ilen[i] = baij->imax[i]; 1014 } 1015 PetscFunctionReturn(0); 1016 } 1017 EXTERN_C_END 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1021 /*@ 1022 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1023 in the matrix. 1024 1025 Input Parameters: 1026 + mat - the SeqSBAIJ matrix 1027 - indices - the column indices 1028 1029 Level: advanced 1030 1031 Notes: 1032 This can be called if you have precomputed the nonzero structure of the 1033 matrix and want to provide it to the matrix object to improve the performance 1034 of the MatSetValues() operation. 1035 1036 You MUST have set the correct numbers of nonzeros per row in the call to 1037 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1038 1039 MUST be called before any calls to MatSetValues() 1040 1041 .seealso: MatCreateSeqSBAIJ 1042 @*/ 1043 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1044 { 1045 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1046 1047 PetscFunctionBegin; 1048 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1049 PetscValidPointer(indices,2); 1050 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1051 if (f) { 1052 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1053 } else { 1054 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1055 } 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1061 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1062 { 1063 PetscErrorCode ierr; 1064 1065 PetscFunctionBegin; 1066 /* If the two matrices have the same copy implementation, use fast copy. */ 1067 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1068 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1069 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1070 1071 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1072 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1073 } 1074 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1075 } else { 1076 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1077 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1078 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1085 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1086 { 1087 PetscErrorCode ierr; 1088 1089 PetscFunctionBegin; 1090 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1096 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1097 { 1098 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1099 PetscFunctionBegin; 1100 *array = a->a; 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1106 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1107 { 1108 PetscFunctionBegin; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #include "petscblaslapack.h" 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1115 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1116 { 1117 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1118 PetscErrorCode ierr; 1119 PetscInt i,bs=Y->rmap.bs,bs2,j; 1120 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1121 1122 PetscFunctionBegin; 1123 if (str == SAME_NONZERO_PATTERN) { 1124 PetscScalar alpha = a; 1125 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1126 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1127 if (y->xtoy && y->XtoY != X) { 1128 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1129 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1130 } 1131 if (!y->xtoy) { /* get xtoy */ 1132 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1133 y->XtoY = X; 1134 } 1135 bs2 = bs*bs; 1136 for (i=0; i<x->nz; i++) { 1137 j = 0; 1138 while (j < bs2){ 1139 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1140 j++; 1141 } 1142 } 1143 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1144 } else { 1145 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1146 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1147 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1148 } 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1154 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1155 { 1156 PetscFunctionBegin; 1157 *flg = PETSC_TRUE; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1163 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1164 { 1165 PetscFunctionBegin; 1166 *flg = PETSC_TRUE; 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1172 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1173 { 1174 PetscFunctionBegin; 1175 *flg = PETSC_FALSE; 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1181 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1182 { 1183 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1184 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1185 PetscScalar *aa = a->a; 1186 1187 PetscFunctionBegin; 1188 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1194 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1195 { 1196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1197 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1198 PetscScalar *aa = a->a; 1199 1200 PetscFunctionBegin; 1201 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 /* -------------------------------------------------------------------*/ 1206 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1207 MatGetRow_SeqSBAIJ, 1208 MatRestoreRow_SeqSBAIJ, 1209 MatMult_SeqSBAIJ_N, 1210 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1211 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1212 MatMultAdd_SeqSBAIJ_N, 1213 MatSolve_SeqSBAIJ_N, 1214 0, 1215 0, 1216 /*10*/ 0, 1217 0, 1218 MatCholeskyFactor_SeqSBAIJ, 1219 MatRelax_SeqSBAIJ, 1220 MatTranspose_SeqSBAIJ, 1221 /*15*/ MatGetInfo_SeqSBAIJ, 1222 MatEqual_SeqSBAIJ, 1223 MatGetDiagonal_SeqSBAIJ, 1224 MatDiagonalScale_SeqSBAIJ, 1225 MatNorm_SeqSBAIJ, 1226 /*20*/ 0, 1227 MatAssemblyEnd_SeqSBAIJ, 1228 0, 1229 MatSetOption_SeqSBAIJ, 1230 MatZeroEntries_SeqSBAIJ, 1231 /*25*/ 0, 1232 0, 1233 0, 1234 MatCholeskyFactorSymbolic_SeqSBAIJ, 1235 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1236 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1237 0, 1238 MatICCFactorSymbolic_SeqSBAIJ, 1239 MatGetArray_SeqSBAIJ, 1240 MatRestoreArray_SeqSBAIJ, 1241 /*35*/ MatDuplicate_SeqSBAIJ, 1242 MatForwardSolve_SeqSBAIJ_N, 1243 MatBackwardSolve_SeqSBAIJ_N, 1244 0, 1245 MatICCFactor_SeqSBAIJ, 1246 /*40*/ MatAXPY_SeqSBAIJ, 1247 MatGetSubMatrices_SeqSBAIJ, 1248 MatIncreaseOverlap_SeqSBAIJ, 1249 MatGetValues_SeqSBAIJ, 1250 MatCopy_SeqSBAIJ, 1251 /*45*/ 0, 1252 MatScale_SeqSBAIJ, 1253 0, 1254 0, 1255 0, 1256 /*50*/ 0, 1257 MatGetRowIJ_SeqSBAIJ, 1258 MatRestoreRowIJ_SeqSBAIJ, 1259 0, 1260 0, 1261 /*55*/ 0, 1262 0, 1263 0, 1264 0, 1265 MatSetValuesBlocked_SeqSBAIJ, 1266 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1267 0, 1268 0, 1269 0, 1270 0, 1271 /*65*/ 0, 1272 0, 1273 0, 1274 0, 1275 0, 1276 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1277 0, 1278 0, 1279 0, 1280 0, 1281 /*75*/ 0, 1282 0, 1283 0, 1284 0, 1285 0, 1286 /*80*/ 0, 1287 0, 1288 0, 1289 #if !defined(PETSC_USE_COMPLEX) 1290 MatGetInertia_SeqSBAIJ, 1291 #else 1292 0, 1293 #endif 1294 MatLoad_SeqSBAIJ, 1295 /*85*/ MatIsSymmetric_SeqSBAIJ, 1296 MatIsHermitian_SeqSBAIJ, 1297 MatIsStructurallySymmetric_SeqSBAIJ, 1298 0, 1299 0, 1300 /*90*/ 0, 1301 0, 1302 0, 1303 0, 1304 0, 1305 /*95*/ 0, 1306 0, 1307 0, 1308 0, 1309 0, 1310 /*100*/0, 1311 0, 1312 0, 1313 0, 1314 0, 1315 /*105*/0, 1316 MatRealPart_SeqSBAIJ, 1317 MatImaginaryPart_SeqSBAIJ, 1318 MatGetRowUpperTriangular_SeqSBAIJ, 1319 MatRestoreRowUpperTriangular_SeqSBAIJ 1320 }; 1321 1322 EXTERN_C_BEGIN 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1325 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1326 { 1327 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1328 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 if (aij->nonew != 1) { 1333 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1334 } 1335 1336 /* allocate space for values if not already there */ 1337 if (!aij->saved_values) { 1338 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1339 } 1340 1341 /* copy values over */ 1342 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 EXTERN_C_END 1346 1347 EXTERN_C_BEGIN 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1350 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1351 { 1352 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1353 PetscErrorCode ierr; 1354 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1355 1356 PetscFunctionBegin; 1357 if (aij->nonew != 1) { 1358 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1359 } 1360 if (!aij->saved_values) { 1361 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1362 } 1363 1364 /* copy values over */ 1365 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 EXTERN_C_END 1369 1370 EXTERN_C_BEGIN 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1373 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1374 { 1375 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1376 PetscErrorCode ierr; 1377 PetscInt i,mbs,bs2; 1378 PetscTruth skipallocation = PETSC_FALSE,flg; 1379 1380 PetscFunctionBegin; 1381 B->preallocated = PETSC_TRUE; 1382 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1383 B->rmap.bs = B->cmap.bs = bs; 1384 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1385 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1386 1387 mbs = B->rmap.N/bs; 1388 bs2 = bs*bs; 1389 1390 if (mbs*bs != B->rmap.N) { 1391 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1392 } 1393 1394 if (nz == MAT_SKIP_ALLOCATION) { 1395 skipallocation = PETSC_TRUE; 1396 nz = 0; 1397 } 1398 1399 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1400 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1401 if (nnz) { 1402 for (i=0; i<mbs; i++) { 1403 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1404 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1405 } 1406 } 1407 1408 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1409 if (!flg) { 1410 switch (bs) { 1411 case 1: 1412 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1413 B->ops->solve = MatSolve_SeqSBAIJ_1; 1414 B->ops->solves = MatSolves_SeqSBAIJ_1; 1415 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1416 B->ops->mult = MatMult_SeqSBAIJ_1; 1417 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1418 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1419 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1420 break; 1421 case 2: 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1423 B->ops->solve = MatSolve_SeqSBAIJ_2; 1424 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1425 B->ops->mult = MatMult_SeqSBAIJ_2; 1426 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1427 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1428 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1429 break; 1430 case 3: 1431 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1432 B->ops->solve = MatSolve_SeqSBAIJ_3; 1433 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1434 B->ops->mult = MatMult_SeqSBAIJ_3; 1435 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1436 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1437 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1438 break; 1439 case 4: 1440 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1441 B->ops->solve = MatSolve_SeqSBAIJ_4; 1442 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1443 B->ops->mult = MatMult_SeqSBAIJ_4; 1444 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1445 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1446 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1447 break; 1448 case 5: 1449 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1450 B->ops->solve = MatSolve_SeqSBAIJ_5; 1451 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1452 B->ops->mult = MatMult_SeqSBAIJ_5; 1453 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1454 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1455 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1456 break; 1457 case 6: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1459 B->ops->solve = MatSolve_SeqSBAIJ_6; 1460 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1461 B->ops->mult = MatMult_SeqSBAIJ_6; 1462 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1463 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1464 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1465 break; 1466 case 7: 1467 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1468 B->ops->solve = MatSolve_SeqSBAIJ_7; 1469 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1470 B->ops->mult = MatMult_SeqSBAIJ_7; 1471 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1472 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1473 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1474 break; 1475 } 1476 } 1477 1478 b->mbs = mbs; 1479 b->nbs = mbs; 1480 if (!skipallocation) { 1481 /* b->ilen will count nonzeros in each block row so far. */ 1482 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1483 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1484 if (!nnz) { 1485 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1486 else if (nz <= 0) nz = 1; 1487 for (i=0; i<mbs; i++) { 1488 b->imax[i] = nz; 1489 } 1490 nz = nz*mbs; /* total nz */ 1491 } else { 1492 nz = 0; 1493 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1494 } 1495 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1496 1497 /* allocate the matrix space */ 1498 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1499 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1500 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1501 b->singlemalloc = PETSC_TRUE; 1502 1503 /* pointer to beginning of each row */ 1504 b->i[0] = 0; 1505 for (i=1; i<mbs+1; i++) { 1506 b->i[i] = b->i[i-1] + b->imax[i-1]; 1507 } 1508 b->free_a = PETSC_TRUE; 1509 b->free_ij = PETSC_TRUE; 1510 } else { 1511 b->free_a = PETSC_FALSE; 1512 b->free_ij = PETSC_FALSE; 1513 } 1514 1515 B->rmap.bs = bs; 1516 b->bs2 = bs2; 1517 b->nz = 0; 1518 b->maxnz = nz*bs2; 1519 1520 b->inew = 0; 1521 b->jnew = 0; 1522 b->anew = 0; 1523 b->a2anew = 0; 1524 b->permute = PETSC_FALSE; 1525 PetscFunctionReturn(0); 1526 } 1527 EXTERN_C_END 1528 1529 EXTERN_C_BEGIN 1530 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1531 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1532 EXTERN_C_END 1533 1534 /*MC 1535 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1536 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1537 1538 Options Database Keys: 1539 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1540 1541 Level: beginner 1542 1543 .seealso: MatCreateSeqSBAIJ 1544 M*/ 1545 1546 EXTERN_C_BEGIN 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1550 { 1551 Mat_SeqSBAIJ *b; 1552 PetscErrorCode ierr; 1553 PetscMPIInt size; 1554 PetscTruth flg; 1555 1556 PetscFunctionBegin; 1557 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1558 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1559 1560 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1561 B->data = (void*)b; 1562 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1563 B->ops->destroy = MatDestroy_SeqSBAIJ; 1564 B->ops->view = MatView_SeqSBAIJ; 1565 B->factor = 0; 1566 B->mapping = 0; 1567 b->row = 0; 1568 b->icol = 0; 1569 b->reallocs = 0; 1570 b->saved_values = 0; 1571 1572 1573 b->roworiented = PETSC_TRUE; 1574 b->nonew = 0; 1575 b->diag = 0; 1576 b->solve_work = 0; 1577 b->mult_work = 0; 1578 B->spptr = 0; 1579 b->keepzeroedrows = PETSC_FALSE; 1580 b->xtoy = 0; 1581 b->XtoY = 0; 1582 1583 b->inew = 0; 1584 b->jnew = 0; 1585 b->anew = 0; 1586 b->a2anew = 0; 1587 b->permute = PETSC_FALSE; 1588 1589 b->ignore_ltriangular = PETSC_FALSE; 1590 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1591 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1592 1593 b->getrow_utriangular = PETSC_FALSE; 1594 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1595 if (flg) b->getrow_utriangular = PETSC_TRUE; 1596 1597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1598 "MatStoreValues_SeqSBAIJ", 1599 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1601 "MatRetrieveValues_SeqSBAIJ", 1602 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1604 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1605 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1607 "MatConvert_SeqSBAIJ_SeqAIJ", 1608 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1610 "MatConvert_SeqSBAIJ_SeqBAIJ", 1611 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1613 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1614 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1615 1616 B->symmetric = PETSC_TRUE; 1617 B->structurally_symmetric = PETSC_TRUE; 1618 B->symmetric_set = PETSC_TRUE; 1619 B->structurally_symmetric_set = PETSC_TRUE; 1620 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 EXTERN_C_END 1624 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1627 /*@C 1628 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1629 compressed row) format. For good matrix assembly performance the 1630 user should preallocate the matrix storage by setting the parameter nz 1631 (or the array nnz). By setting these parameters accurately, performance 1632 during matrix assembly can be increased by more than a factor of 50. 1633 1634 Collective on Mat 1635 1636 Input Parameters: 1637 + A - the symmetric matrix 1638 . bs - size of block 1639 . nz - number of block nonzeros per block row (same for all rows) 1640 - nnz - array containing the number of block nonzeros in the upper triangular plus 1641 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1642 1643 Options Database Keys: 1644 . -mat_no_unroll - uses code that does not unroll the loops in the 1645 block calculations (much slower) 1646 . -mat_block_size - size of the blocks to use 1647 1648 Level: intermediate 1649 1650 Notes: 1651 Specify the preallocated storage with either nz or nnz (not both). 1652 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1653 allocation. For additional details, see the users manual chapter on 1654 matrices. 1655 1656 If the nnz parameter is given then the nz parameter is ignored 1657 1658 1659 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1660 @*/ 1661 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1662 { 1663 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1664 1665 PetscFunctionBegin; 1666 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1667 if (f) { 1668 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1669 } 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatCreateSeqSBAIJ" 1675 /*@C 1676 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1677 compressed row) format. For good matrix assembly performance the 1678 user should preallocate the matrix storage by setting the parameter nz 1679 (or the array nnz). By setting these parameters accurately, performance 1680 during matrix assembly can be increased by more than a factor of 50. 1681 1682 Collective on MPI_Comm 1683 1684 Input Parameters: 1685 + comm - MPI communicator, set to PETSC_COMM_SELF 1686 . bs - size of block 1687 . m - number of rows, or number of columns 1688 . nz - number of block nonzeros per block row (same for all rows) 1689 - nnz - array containing the number of block nonzeros in the upper triangular plus 1690 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1691 1692 Output Parameter: 1693 . A - the symmetric matrix 1694 1695 Options Database Keys: 1696 . -mat_no_unroll - uses code that does not unroll the loops in the 1697 block calculations (much slower) 1698 . -mat_block_size - size of the blocks to use 1699 1700 Level: intermediate 1701 1702 Notes: 1703 The number of rows and columns must be divisible by blocksize. 1704 This matrix type does not support complex Hermitian operation. 1705 1706 Specify the preallocated storage with either nz or nnz (not both). 1707 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1708 allocation. For additional details, see the users manual chapter on 1709 matrices. 1710 1711 If the nnz parameter is given then the nz parameter is ignored 1712 1713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1714 @*/ 1715 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1716 { 1717 PetscErrorCode ierr; 1718 1719 PetscFunctionBegin; 1720 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1721 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1722 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1723 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1729 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1730 { 1731 Mat C; 1732 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1733 PetscErrorCode ierr; 1734 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1735 1736 PetscFunctionBegin; 1737 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1738 1739 *B = 0; 1740 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1741 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1742 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1743 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1744 c = (Mat_SeqSBAIJ*)C->data; 1745 1746 C->preallocated = PETSC_TRUE; 1747 C->factor = A->factor; 1748 c->row = 0; 1749 c->icol = 0; 1750 c->saved_values = 0; 1751 c->keepzeroedrows = a->keepzeroedrows; 1752 C->assembled = PETSC_TRUE; 1753 1754 ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1755 ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1756 c->bs2 = a->bs2; 1757 c->mbs = a->mbs; 1758 c->nbs = a->nbs; 1759 1760 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1761 for (i=0; i<mbs; i++) { 1762 c->imax[i] = a->imax[i]; 1763 c->ilen[i] = a->ilen[i]; 1764 } 1765 1766 /* allocate the matrix space */ 1767 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1768 c->singlemalloc = PETSC_TRUE; 1769 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1770 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1771 if (mbs > 0) { 1772 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1773 if (cpvalues == MAT_COPY_VALUES) { 1774 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1775 } else { 1776 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1777 } 1778 } 1779 1780 c->roworiented = a->roworiented; 1781 c->nonew = a->nonew; 1782 1783 if (a->diag) { 1784 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1785 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1786 for (i=0; i<mbs; i++) { 1787 c->diag[i] = a->diag[i]; 1788 } 1789 } else c->diag = 0; 1790 c->nz = a->nz; 1791 c->maxnz = a->maxnz; 1792 c->solve_work = 0; 1793 c->mult_work = 0; 1794 c->free_a = PETSC_TRUE; 1795 c->free_ij = PETSC_TRUE; 1796 *B = C; 1797 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1798 PetscFunctionReturn(0); 1799 } 1800 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1803 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1804 { 1805 Mat_SeqSBAIJ *a; 1806 Mat B; 1807 PetscErrorCode ierr; 1808 int fd; 1809 PetscMPIInt size; 1810 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1811 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1812 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1813 PetscInt *masked,nmask,tmp,bs2,ishift; 1814 PetscScalar *aa; 1815 MPI_Comm comm = ((PetscObject)viewer)->comm; 1816 1817 PetscFunctionBegin; 1818 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1819 bs2 = bs*bs; 1820 1821 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1822 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1823 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1824 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1825 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1826 M = header[1]; N = header[2]; nz = header[3]; 1827 1828 if (header[3] < 0) { 1829 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1830 } 1831 1832 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1833 1834 /* 1835 This code adds extra rows to make sure the number of rows is 1836 divisible by the blocksize 1837 */ 1838 mbs = M/bs; 1839 extra_rows = bs - M + bs*(mbs); 1840 if (extra_rows == bs) extra_rows = 0; 1841 else mbs++; 1842 if (extra_rows) { 1843 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1844 } 1845 1846 /* read in row lengths */ 1847 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1848 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1849 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1850 1851 /* read in column indices */ 1852 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1853 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1854 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1855 1856 /* loop over row lengths determining block row lengths */ 1857 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1858 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1859 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1860 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1861 masked = mask + mbs; 1862 rowcount = 0; nzcount = 0; 1863 for (i=0; i<mbs; i++) { 1864 nmask = 0; 1865 for (j=0; j<bs; j++) { 1866 kmax = rowlengths[rowcount]; 1867 for (k=0; k<kmax; k++) { 1868 tmp = jj[nzcount++]/bs; /* block col. index */ 1869 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1870 } 1871 rowcount++; 1872 } 1873 s_browlengths[i] += nmask; 1874 1875 /* zero out the mask elements we set */ 1876 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1877 } 1878 1879 /* create our matrix */ 1880 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1881 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1882 ierr = MatSetType(B,type);CHKERRQ(ierr); 1883 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1884 a = (Mat_SeqSBAIJ*)B->data; 1885 1886 /* set matrix "i" values */ 1887 a->i[0] = 0; 1888 for (i=1; i<= mbs; i++) { 1889 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1890 a->ilen[i-1] = s_browlengths[i-1]; 1891 } 1892 a->nz = a->i[mbs]; 1893 1894 /* read in nonzero values */ 1895 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1896 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1897 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1898 1899 /* set "a" and "j" values into matrix */ 1900 nzcount = 0; jcount = 0; 1901 for (i=0; i<mbs; i++) { 1902 nzcountb = nzcount; 1903 nmask = 0; 1904 for (j=0; j<bs; j++) { 1905 kmax = rowlengths[i*bs+j]; 1906 for (k=0; k<kmax; k++) { 1907 tmp = jj[nzcount++]/bs; /* block col. index */ 1908 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1909 } 1910 } 1911 /* sort the masked values */ 1912 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1913 1914 /* set "j" values into matrix */ 1915 maskcount = 1; 1916 for (j=0; j<nmask; j++) { 1917 a->j[jcount++] = masked[j]; 1918 mask[masked[j]] = maskcount++; 1919 } 1920 1921 /* set "a" values into matrix */ 1922 ishift = bs2*a->i[i]; 1923 for (j=0; j<bs; j++) { 1924 kmax = rowlengths[i*bs+j]; 1925 for (k=0; k<kmax; k++) { 1926 tmp = jj[nzcountb]/bs ; /* block col. index */ 1927 if (tmp >= i){ 1928 block = mask[tmp] - 1; 1929 point = jj[nzcountb] - bs*tmp; 1930 idx = ishift + bs2*block + j + bs*point; 1931 a->a[idx] = aa[nzcountb]; 1932 } 1933 nzcountb++; 1934 } 1935 } 1936 /* zero out the mask elements we set */ 1937 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1938 } 1939 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1940 1941 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1942 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1943 ierr = PetscFree(aa);CHKERRQ(ierr); 1944 ierr = PetscFree(jj);CHKERRQ(ierr); 1945 ierr = PetscFree(mask);CHKERRQ(ierr); 1946 1947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1949 ierr = MatView_Private(B);CHKERRQ(ierr); 1950 *A = B; 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1956 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1957 { 1958 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1959 MatScalar *aa=a->a,*v,*v1; 1960 PetscScalar *x,*b,*t,sum,d; 1961 PetscErrorCode ierr; 1962 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1963 PetscInt nz,nz1,*vj,*vj1,i; 1964 1965 PetscFunctionBegin; 1966 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1967 1968 its = its*lits; 1969 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1970 1971 if (bs > 1) 1972 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1973 1974 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1975 if (xx != bb) { 1976 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1977 } else { 1978 b = x; 1979 } 1980 1981 if (!a->relax_work) { 1982 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 1983 } 1984 t = a->relax_work; 1985 1986 if (flag & SOR_ZERO_INITIAL_GUESS) { 1987 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1988 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 1989 1990 for (i=0; i<m; i++){ 1991 d = *(aa + ai[i]); /* diag[i] */ 1992 v = aa + ai[i] + 1; 1993 vj = aj + ai[i] + 1; 1994 nz = ai[i+1] - ai[i] - 1; 1995 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1996 x[i] = omega*t[i]/d; 1997 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1998 } 1999 } 2000 2001 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2002 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2003 t = b; 2004 } 2005 2006 for (i=m-1; i>=0; i--){ 2007 d = *(aa + ai[i]); 2008 v = aa + ai[i] + 1; 2009 vj = aj + ai[i] + 1; 2010 nz = ai[i+1] - ai[i] - 1; 2011 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2012 sum = t[i]; 2013 while (nz--) sum -= x[*vj++]*(*v++); 2014 x[i] = (1-omega)*x[i] + omega*sum/d; 2015 } 2016 t = a->relax_work; 2017 } 2018 its--; 2019 } 2020 2021 while (its--) { 2022 /* 2023 forward sweep: 2024 for i=0,...,m-1: 2025 sum[i] = (b[i] - U(i,:)x )/d[i]; 2026 x[i] = (1-omega)x[i] + omega*sum[i]; 2027 b = b - x[i]*U^T(i,:); 2028 2029 */ 2030 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2031 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2032 2033 for (i=0; i<m; i++){ 2034 d = *(aa + ai[i]); /* diag[i] */ 2035 v = aa + ai[i] + 1; v1=v; 2036 vj = aj + ai[i] + 1; vj1=vj; 2037 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2038 sum = t[i]; 2039 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2040 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2041 x[i] = (1-omega)*x[i] + omega*sum/d; 2042 while (nz--) t[*vj++] -= x[i]*(*v++); 2043 } 2044 } 2045 2046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2047 /* 2048 backward sweep: 2049 b = b - x[i]*U^T(i,:), i=0,...,n-2 2050 for i=m-1,...,0: 2051 sum[i] = (b[i] - U(i,:)x )/d[i]; 2052 x[i] = (1-omega)x[i] + omega*sum[i]; 2053 */ 2054 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2055 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2056 2057 for (i=0; i<m-1; i++){ /* update rhs */ 2058 v = aa + ai[i] + 1; 2059 vj = aj + ai[i] + 1; 2060 nz = ai[i+1] - ai[i] - 1; 2061 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2062 while (nz--) t[*vj++] -= x[i]*(*v++); 2063 } 2064 for (i=m-1; i>=0; i--){ 2065 d = *(aa + ai[i]); 2066 v = aa + ai[i] + 1; 2067 vj = aj + ai[i] + 1; 2068 nz = ai[i+1] - ai[i] - 1; 2069 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2070 sum = t[i]; 2071 while (nz--) sum -= x[*vj++]*(*v++); 2072 x[i] = (1-omega)*x[i] + omega*sum/d; 2073 } 2074 } 2075 } 2076 2077 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2078 if (bb != xx) { 2079 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 2084 #undef __FUNCT__ 2085 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2086 /*@ 2087 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2088 (upper triangular entries in CSR format) provided by the user. 2089 2090 Collective on MPI_Comm 2091 2092 Input Parameters: 2093 + comm - must be an MPI communicator of size 1 2094 . bs - size of block 2095 . m - number of rows 2096 . n - number of columns 2097 . i - row indices 2098 . j - column indices 2099 - a - matrix values 2100 2101 Output Parameter: 2102 . mat - the matrix 2103 2104 Level: intermediate 2105 2106 Notes: 2107 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2108 once the matrix is destroyed 2109 2110 You cannot set new nonzero locations into this matrix, that will generate an error. 2111 2112 The i and j indices are 0 based 2113 2114 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2115 2116 @*/ 2117 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2118 { 2119 PetscErrorCode ierr; 2120 PetscInt ii; 2121 Mat_SeqSBAIJ *sbaij; 2122 2123 PetscFunctionBegin; 2124 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2125 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2126 2127 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2128 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2129 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2130 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2131 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2132 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2133 2134 sbaij->i = i; 2135 sbaij->j = j; 2136 sbaij->a = a; 2137 sbaij->singlemalloc = PETSC_FALSE; 2138 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2139 sbaij->free_a = PETSC_FALSE; 2140 sbaij->free_ij = PETSC_FALSE; 2141 2142 for (ii=0; ii<m; ii++) { 2143 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2144 #if defined(PETSC_USE_DEBUG) 2145 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2146 #endif 2147 } 2148 #if defined(PETSC_USE_DEBUG) 2149 for (ii=0; ii<sbaij->i[m]; ii++) { 2150 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2151 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2152 } 2153 #endif 2154 2155 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 2161 2162 2163 2164