1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 *missing = PETSC_FALSE; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 *missing = PETSC_TRUE; 31 if (dd) *dd = i; 32 break; 33 } 34 } 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 41 { 42 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 46 PetscFunctionBegin; 47 if (!a->diag) { 48 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 49 } 50 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 *nn = n; 64 if (!ia) PetscFunctionReturn(0); 65 if (!blockcompressed) { 66 /* malloc & create the natural set of indices */ 67 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 68 for (i=0; i<n+1; i++) { 69 for (j=0; j<bs; j++) { 70 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 71 } 72 } 73 for (i=0; i<nz; i++) { 74 for (j=0; j<bs; j++) { 75 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 76 } 77 } 78 } else { /* blockcompressed */ 79 if (oshift == 1) { 80 /* temporarily add 1 to i and j indices */ 81 for (i=0; i<nz; i++) a->j[i]++; 82 for (i=0; i<n+1; i++) a->i[i]++; 83 } 84 *ia = a->i; *ja = a->j; 85 } 86 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscInt i,n = a->mbs,nz = a->i[n]; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 if (!ia) PetscFunctionReturn(0); 100 101 if (!blockcompressed) { 102 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 103 } else if (oshift == 1) { /* blockcompressed */ 104 for (i=0; i<nz; i++) a->j[i]--; 105 for (i=0; i<n+1; i++) a->i[i]--; 106 } 107 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 114 { 115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 #if defined(PETSC_USE_LOG) 120 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 121 #endif 122 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 123 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 124 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 ierr = PetscFree(a->diag);CHKERRQ(ierr); 127 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 128 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 129 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 130 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 131 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 132 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 133 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 134 135 ierr = PetscFree(a->inew);CHKERRQ(ierr); 136 ierr = PetscFree(a);CHKERRQ(ierr); 137 138 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 140 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 142 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 143 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 144 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 150 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 151 { 152 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 switch (op) { 157 case MAT_ROW_ORIENTED: 158 a->roworiented = flg; 159 break; 160 case MAT_KEEP_ZEROED_ROWS: 161 a->keepzeroedrows = flg; 162 break; 163 case MAT_NEW_NONZERO_LOCATIONS: 164 a->nonew = (flg ? 0 : 1); 165 break; 166 case MAT_NEW_NONZERO_LOCATION_ERR: 167 a->nonew = (flg ? -1 : 0); 168 break; 169 case MAT_NEW_NONZERO_ALLOCATION_ERR: 170 a->nonew = (flg ? -2 : 0); 171 break; 172 case MAT_NEW_DIAGONALS: 173 case MAT_IGNORE_OFF_PROC_ENTRIES: 174 case MAT_USE_HASH_TABLE: 175 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 176 break; 177 case MAT_HERMITIAN: 178 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 179 case MAT_SYMMETRIC: 180 case MAT_STRUCTURALLY_SYMMETRIC: 181 case MAT_SYMMETRY_ETERNAL: 182 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 183 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 184 break; 185 case MAT_IGNORE_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = flg; 187 break; 188 case MAT_ERROR_LOWER_TRIANGULAR: 189 a->ignore_ltriangular = flg; 190 break; 191 case MAT_GETROW_UPPERTRIANGULAR: 192 a->getrow_utriangular = flg; 193 break; 194 default: 195 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 202 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 203 { 204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 205 PetscErrorCode ierr; 206 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 207 MatScalar *aa,*aa_i; 208 PetscScalar *v_i; 209 210 PetscFunctionBegin; 211 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 212 /* Get the upper triangular part of the row */ 213 bs = A->rmap.bs; 214 ai = a->i; 215 aj = a->j; 216 aa = a->a; 217 bs2 = a->bs2; 218 219 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 220 221 bn = row/bs; /* Block number */ 222 bp = row % bs; /* Block position */ 223 M = ai[bn+1] - ai[bn]; 224 *ncols = bs*M; 225 226 if (v) { 227 *v = 0; 228 if (*ncols) { 229 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 230 for (i=0; i<M; i++) { /* for each block in the block row */ 231 v_i = *v + i*bs; 232 aa_i = aa + bs2*(ai[bn] + i); 233 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 234 } 235 } 236 } 237 238 if (cols) { 239 *cols = 0; 240 if (*ncols) { 241 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 242 for (i=0; i<M; i++) { /* for each block in the block row */ 243 cols_i = *cols + i*bs; 244 itmp = bs*aj[ai[bn] + i]; 245 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 246 } 247 } 248 } 249 250 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 251 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 252 #ifdef column_search 253 v_i = *v + M*bs; 254 cols_i = *cols + M*bs; 255 for (i=0; i<bn; i++){ /* for each block row */ 256 M = ai[i+1] - ai[i]; 257 for (j=0; j<M; j++){ 258 itmp = aj[ai[i] + j]; /* block column value */ 259 if (itmp == bn){ 260 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 261 for (k=0; k<bs; k++) { 262 *cols_i++ = i*bs+k; 263 *v_i++ = aa_i[k]; 264 } 265 *ncols += bs; 266 break; 267 } 268 } 269 } 270 #endif 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 276 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 277 { 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 282 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 288 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 292 PetscFunctionBegin; 293 a->getrow_utriangular = PETSC_TRUE; 294 PetscFunctionReturn(0); 295 } 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 298 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 299 { 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 301 302 PetscFunctionBegin; 303 a->getrow_utriangular = PETSC_FALSE; 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 309 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 310 { 311 PetscErrorCode ierr; 312 PetscFunctionBegin; 313 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 319 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 320 { 321 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 322 PetscErrorCode ierr; 323 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 324 const char *name; 325 PetscViewerFormat format; 326 327 PetscFunctionBegin; 328 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 329 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 330 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 331 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 332 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 333 Mat aij; 334 335 if (A->factor && bs>1){ 336 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 340 ierr = MatView(aij,viewer);CHKERRQ(ierr); 341 ierr = MatDestroy(aij);CHKERRQ(ierr); 342 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 343 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 344 for (i=0; i<a->mbs; i++) { 345 for (j=0; j<bs; j++) { 346 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 347 for (k=a->i[i]; k<a->i[i+1]; k++) { 348 for (l=0; l<bs; l++) { 349 #if defined(PETSC_USE_COMPLEX) 350 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 351 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 352 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 353 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 354 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 355 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 356 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 357 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 358 } 359 #else 360 if (a->a[bs2*k + l*bs + j] != 0.0) { 361 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 362 } 363 #endif 364 } 365 } 366 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 367 } 368 } 369 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 371 PetscFunctionReturn(0); 372 } else { 373 if (A->factor && bs>1){ 374 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 375 } 376 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 377 for (i=0; i<a->mbs; i++) { 378 for (j=0; j<bs; j++) { 379 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 380 for (k=a->i[i]; k<a->i[i+1]; k++) { 381 for (l=0; l<bs; l++) { 382 #if defined(PETSC_USE_COMPLEX) 383 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 384 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 385 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 386 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 387 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 388 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } else { 390 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 391 } 392 #else 393 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 394 #endif 395 } 396 } 397 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 398 } 399 } 400 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 401 } 402 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 408 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 409 { 410 Mat A = (Mat) Aa; 411 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 412 PetscErrorCode ierr; 413 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 414 PetscMPIInt rank; 415 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 416 MatScalar *aa; 417 MPI_Comm comm; 418 PetscViewer viewer; 419 420 PetscFunctionBegin; 421 /* 422 This is nasty. If this is called from an originally parallel matrix 423 then all processes call this,but only the first has the matrix so the 424 rest should return immediately. 425 */ 426 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 427 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 428 if (rank) PetscFunctionReturn(0); 429 430 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 431 432 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 433 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 434 435 /* loop over matrix elements drawing boxes */ 436 color = PETSC_DRAW_BLUE; 437 for (i=0,row=0; i<mbs; i++,row+=bs) { 438 for (j=a->i[i]; j<a->i[i+1]; j++) { 439 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 440 x_l = a->j[j]*bs; x_r = x_l + 1.0; 441 aa = a->a + j*bs2; 442 for (k=0; k<bs; k++) { 443 for (l=0; l<bs; l++) { 444 if (PetscRealPart(*aa++) >= 0.) continue; 445 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 446 } 447 } 448 } 449 } 450 color = PETSC_DRAW_CYAN; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) != 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 465 color = PETSC_DRAW_RED; 466 for (i=0,row=0; i<mbs; i++,row+=bs) { 467 for (j=a->i[i]; j<a->i[i+1]; j++) { 468 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 469 x_l = a->j[j]*bs; x_r = x_l + 1.0; 470 aa = a->a + j*bs2; 471 for (k=0; k<bs; k++) { 472 for (l=0; l<bs; l++) { 473 if (PetscRealPart(*aa++) <= 0.) continue; 474 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 484 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 485 { 486 PetscErrorCode ierr; 487 PetscReal xl,yl,xr,yr,w,h; 488 PetscDraw draw; 489 PetscTruth isnull; 490 491 PetscFunctionBegin; 492 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 493 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 494 495 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 496 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 497 xr += w; yr += h; xl = -w; yl = -h; 498 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 499 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 500 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "MatView_SeqSBAIJ" 506 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 507 { 508 PetscErrorCode ierr; 509 PetscTruth iascii,isdraw; 510 511 PetscFunctionBegin; 512 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 513 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 514 if (iascii){ 515 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 516 } else if (isdraw) { 517 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 518 } else { 519 Mat B; 520 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 521 ierr = MatView(B,viewer);CHKERRQ(ierr); 522 ierr = MatDestroy(B);CHKERRQ(ierr); 523 } 524 PetscFunctionReturn(0); 525 } 526 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 530 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 531 { 532 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 533 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 534 PetscInt *ai = a->i,*ailen = a->ilen; 535 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 536 MatScalar *ap,*aa = a->a; 537 538 PetscFunctionBegin; 539 for (k=0; k<m; k++) { /* loop over rows */ 540 row = im[k]; brow = row/bs; 541 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 542 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 543 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 544 nrow = ailen[brow]; 545 for (l=0; l<n; l++) { /* loop over columns */ 546 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 547 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 548 col = in[l] ; 549 bcol = col/bs; 550 cidx = col%bs; 551 ridx = row%bs; 552 high = nrow; 553 low = 0; /* assume unsorted */ 554 while (high-low > 5) { 555 t = (low+high)/2; 556 if (rp[t] > bcol) high = t; 557 else low = t; 558 } 559 for (i=low; i<high; i++) { 560 if (rp[i] > bcol) break; 561 if (rp[i] == bcol) { 562 *v++ = ap[bs2*i+bs*cidx+ridx]; 563 goto finished; 564 } 565 } 566 *v++ = 0.0; 567 finished:; 568 } 569 } 570 PetscFunctionReturn(0); 571 } 572 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 576 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 577 { 578 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 579 PetscErrorCode ierr; 580 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 581 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 582 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 583 PetscTruth roworiented=a->roworiented; 584 const PetscScalar *value = v; 585 MatScalar *ap,*aa = a->a,*bap; 586 587 PetscFunctionBegin; 588 if (roworiented) { 589 stepval = (n-1)*bs; 590 } else { 591 stepval = (m-1)*bs; 592 } 593 for (k=0; k<m; k++) { /* loop over added rows */ 594 row = im[k]; 595 if (row < 0) continue; 596 #if defined(PETSC_USE_DEBUG) 597 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 598 #endif 599 rp = aj + ai[row]; 600 ap = aa + bs2*ai[row]; 601 rmax = imax[row]; 602 nrow = ailen[row]; 603 low = 0; 604 high = nrow; 605 for (l=0; l<n; l++) { /* loop over added columns */ 606 if (in[l] < 0) continue; 607 col = in[l]; 608 #if defined(PETSC_USE_DEBUG) 609 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 610 #endif 611 if (col < row) continue; /* ignore lower triangular block */ 612 if (roworiented) { 613 value = v + k*(stepval+bs)*bs + l*bs; 614 } else { 615 value = v + l*(stepval+bs)*bs + k*bs; 616 } 617 if (col <= lastcol) low = 0; else high = nrow; 618 lastcol = col; 619 while (high-low > 7) { 620 t = (low+high)/2; 621 if (rp[t] > col) high = t; 622 else low = t; 623 } 624 for (i=low; i<high; i++) { 625 if (rp[i] > col) break; 626 if (rp[i] == col) { 627 bap = ap + bs2*i; 628 if (roworiented) { 629 if (is == ADD_VALUES) { 630 for (ii=0; ii<bs; ii++,value+=stepval) { 631 for (jj=ii; jj<bs2; jj+=bs) { 632 bap[jj] += *value++; 633 } 634 } 635 } else { 636 for (ii=0; ii<bs; ii++,value+=stepval) { 637 for (jj=ii; jj<bs2; jj+=bs) { 638 bap[jj] = *value++; 639 } 640 } 641 } 642 } else { 643 if (is == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=0; jj<bs; jj++) { 646 *bap++ += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=0; jj<bs; jj++) { 652 *bap++ = *value++; 653 } 654 } 655 } 656 } 657 goto noinsert2; 658 } 659 } 660 if (nonew == 1) goto noinsert2; 661 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 662 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 663 N = nrow++ - 1; high++; 664 /* shift up all the later entries in this row */ 665 for (ii=N; ii>=i; ii--) { 666 rp[ii+1] = rp[ii]; 667 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 668 } 669 if (N >= i) { 670 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 671 } 672 rp[i] = col; 673 bap = ap + bs2*i; 674 if (roworiented) { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=ii; jj<bs2; jj+=bs) { 677 bap[jj] = *value++; 678 } 679 } 680 } else { 681 for (ii=0; ii<bs; ii++,value+=stepval) { 682 for (jj=0; jj<bs; jj++) { 683 *bap++ = *value++; 684 } 685 } 686 } 687 noinsert2:; 688 low = i; 689 } 690 ailen[row] = nrow; 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 697 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 698 { 699 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 700 PetscErrorCode ierr; 701 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 702 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 703 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 704 MatScalar *aa = a->a,*ap; 705 706 PetscFunctionBegin; 707 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 708 709 if (m) rmax = ailen[0]; 710 for (i=1; i<mbs; i++) { 711 /* move each row back by the amount of empty slots (fshift) before it*/ 712 fshift += imax[i-1] - ailen[i-1]; 713 rmax = PetscMax(rmax,ailen[i]); 714 if (fshift) { 715 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 716 N = ailen[i]; 717 for (j=0; j<N; j++) { 718 ip[j-fshift] = ip[j]; 719 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 } 722 ai[i] = ai[i-1] + ailen[i-1]; 723 } 724 if (mbs) { 725 fshift += imax[mbs-1] - ailen[mbs-1]; 726 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 727 } 728 /* reset ilen and imax for each row */ 729 for (i=0; i<mbs; i++) { 730 ailen[i] = imax[i] = ai[i+1] - ai[i]; 731 } 732 a->nz = ai[mbs]; 733 734 /* diagonals may have moved, reset it */ 735 if (a->diag) { 736 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 737 } 738 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 739 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 740 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 741 a->reallocs = 0; 742 A->info.nz_unneeded = (PetscReal)fshift*bs2; 743 PetscFunctionReturn(0); 744 } 745 746 /* 747 This function returns an array of flags which indicate the locations of contiguous 748 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 749 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 750 Assume: sizes should be long enough to hold all the values. 751 */ 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 754 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 755 { 756 PetscInt i,j,k,row; 757 PetscTruth flg; 758 759 PetscFunctionBegin; 760 for (i=0,j=0; i<n; j++) { 761 row = idx[i]; 762 if (row%bs!=0) { /* Not the begining of a block */ 763 sizes[j] = 1; 764 i++; 765 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 766 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 767 i++; 768 } else { /* Begining of the block, so check if the complete block exists */ 769 flg = PETSC_TRUE; 770 for (k=1; k<bs; k++) { 771 if (row+k != idx[i+k]) { /* break in the block */ 772 flg = PETSC_FALSE; 773 break; 774 } 775 } 776 if (flg) { /* No break in the bs */ 777 sizes[j] = bs; 778 i+= bs; 779 } else { 780 sizes[j] = 1; 781 i++; 782 } 783 } 784 } 785 *bs_max = j; 786 PetscFunctionReturn(0); 787 } 788 789 790 /* Only add/insert a(i,j) with i<=j (blocks). 791 Any a(i,j) with i>j input by user is ingored. 792 */ 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 796 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 797 { 798 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 799 PetscErrorCode ierr; 800 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 801 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 802 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 803 PetscInt ridx,cidx,bs2=a->bs2; 804 MatScalar *ap,value,*aa=a->a,*bap; 805 806 PetscFunctionBegin; 807 for (k=0; k<m; k++) { /* loop over added rows */ 808 row = im[k]; /* row number */ 809 brow = row/bs; /* block row number */ 810 if (row < 0) continue; 811 #if defined(PETSC_USE_DEBUG) 812 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 813 #endif 814 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 815 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 816 rmax = imax[brow]; /* maximum space allocated for this row */ 817 nrow = ailen[brow]; /* actual length of this row */ 818 low = 0; 819 820 for (l=0; l<n; l++) { /* loop over added columns */ 821 if (in[l] < 0) continue; 822 #if defined(PETSC_USE_DEBUG) 823 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 824 #endif 825 col = in[l]; 826 bcol = col/bs; /* block col number */ 827 828 if (brow > bcol) { 829 if (a->ignore_ltriangular){ 830 continue; /* ignore lower triangular values */ 831 } else { 832 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 833 } 834 } 835 836 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 837 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 838 /* element value a(k,l) */ 839 if (roworiented) { 840 value = v[l + k*n]; 841 } else { 842 value = v[k + l*m]; 843 } 844 845 /* move pointer bap to a(k,l) quickly and add/insert value */ 846 if (col <= lastcol) low = 0; high = nrow; 847 lastcol = col; 848 while (high-low > 7) { 849 t = (low+high)/2; 850 if (rp[t] > bcol) high = t; 851 else low = t; 852 } 853 for (i=low; i<high; i++) { 854 if (rp[i] > bcol) break; 855 if (rp[i] == bcol) { 856 bap = ap + bs2*i + bs*cidx + ridx; 857 if (is == ADD_VALUES) *bap += value; 858 else *bap = value; 859 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 860 if (brow == bcol && ridx < cidx){ 861 bap = ap + bs2*i + bs*ridx + cidx; 862 if (is == ADD_VALUES) *bap += value; 863 else *bap = value; 864 } 865 goto noinsert1; 866 } 867 } 868 869 if (nonew == 1) goto noinsert1; 870 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 871 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 872 873 N = nrow++ - 1; high++; 874 /* shift up all the later entries in this row */ 875 for (ii=N; ii>=i; ii--) { 876 rp[ii+1] = rp[ii]; 877 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 878 } 879 if (N>=i) { 880 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 881 } 882 rp[i] = bcol; 883 ap[bs2*i + bs*cidx + ridx] = value; 884 noinsert1:; 885 low = i; 886 } 887 } /* end of loop over added columns */ 888 ailen[brow] = nrow; 889 } /* end of loop over added rows */ 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 895 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 896 { 897 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 898 Mat outA; 899 PetscErrorCode ierr; 900 PetscTruth row_identity; 901 902 PetscFunctionBegin; 903 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 904 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 905 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 906 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 907 908 outA = inA; 909 inA->factor = FACTOR_CHOLESKY; 910 911 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 912 /* 913 Blocksize < 8 have a special faster factorization/solver 914 for ICC(0) factorization with natural ordering 915 */ 916 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 917 case 1: 918 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 919 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 920 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 921 inA->ops->solves = MatSolves_SeqSBAIJ_1; 922 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 923 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 924 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 925 break; 926 case 2: 927 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 928 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 929 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 930 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 931 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 932 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 933 break; 934 case 3: 935 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 936 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 937 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 938 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 939 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 940 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 941 break; 942 case 4: 943 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 944 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 945 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 946 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 947 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 948 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 949 break; 950 case 5: 951 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 952 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 953 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 954 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 955 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 956 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 957 break; 958 case 6: 959 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 960 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 961 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 962 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 963 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 964 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 965 break; 966 case 7: 967 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 968 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 969 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 970 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 971 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 972 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 973 break; 974 default: 975 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 976 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 977 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 978 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 979 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 980 break; 981 } 982 983 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 984 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 985 a->row = row; 986 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 987 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 988 a->col = row; 989 990 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 991 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 992 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 993 994 if (!a->solve_work) { 995 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 996 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 997 } 998 999 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 EXTERN_C_BEGIN 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1006 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1007 { 1008 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1009 PetscInt i,nz,n; 1010 1011 PetscFunctionBegin; 1012 nz = baij->maxnz; 1013 n = mat->cmap.n; 1014 for (i=0; i<nz; i++) { 1015 baij->j[i] = indices[i]; 1016 } 1017 baij->nz = nz; 1018 for (i=0; i<n; i++) { 1019 baij->ilen[i] = baij->imax[i]; 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 EXTERN_C_END 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1027 /*@ 1028 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1029 in the matrix. 1030 1031 Input Parameters: 1032 + mat - the SeqSBAIJ matrix 1033 - indices - the column indices 1034 1035 Level: advanced 1036 1037 Notes: 1038 This can be called if you have precomputed the nonzero structure of the 1039 matrix and want to provide it to the matrix object to improve the performance 1040 of the MatSetValues() operation. 1041 1042 You MUST have set the correct numbers of nonzeros per row in the call to 1043 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1044 1045 MUST be called before any calls to MatSetValues() 1046 1047 .seealso: MatCreateSeqSBAIJ 1048 @*/ 1049 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1050 { 1051 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1052 1053 PetscFunctionBegin; 1054 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1055 PetscValidPointer(indices,2); 1056 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1057 if (f) { 1058 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1059 } else { 1060 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1067 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1068 { 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 /* If the two matrices have the same copy implementation, use fast copy. */ 1073 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1074 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1075 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1076 1077 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1078 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1079 } 1080 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1081 } else { 1082 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1083 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1084 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1091 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1092 { 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1102 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1103 { 1104 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1105 PetscFunctionBegin; 1106 *array = a->a; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1112 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1113 { 1114 PetscFunctionBegin; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #include "petscblaslapack.h" 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1121 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1122 { 1123 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1124 PetscErrorCode ierr; 1125 PetscInt i,bs=Y->rmap.bs,bs2,j; 1126 PetscBLASInt one = 1,bnz = PetscBLASIntCast(x->nz); 1127 1128 PetscFunctionBegin; 1129 if (str == SAME_NONZERO_PATTERN) { 1130 PetscScalar alpha = a; 1131 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1132 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1133 if (y->xtoy && y->XtoY != X) { 1134 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1135 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1136 } 1137 if (!y->xtoy) { /* get xtoy */ 1138 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1139 y->XtoY = X; 1140 } 1141 bs2 = bs*bs; 1142 for (i=0; i<x->nz; i++) { 1143 j = 0; 1144 while (j < bs2){ 1145 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1146 j++; 1147 } 1148 } 1149 ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1150 } else { 1151 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1152 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1153 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1154 } 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1160 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1161 { 1162 PetscFunctionBegin; 1163 *flg = PETSC_TRUE; 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1169 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1170 { 1171 PetscFunctionBegin; 1172 *flg = PETSC_TRUE; 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1178 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1179 { 1180 PetscFunctionBegin; 1181 *flg = PETSC_FALSE; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1187 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1188 { 1189 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1190 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1191 MatScalar *aa = a->a; 1192 1193 PetscFunctionBegin; 1194 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1200 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1201 { 1202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1203 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1204 MatScalar *aa = a->a; 1205 1206 PetscFunctionBegin; 1207 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 /* -------------------------------------------------------------------*/ 1212 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1213 MatGetRow_SeqSBAIJ, 1214 MatRestoreRow_SeqSBAIJ, 1215 MatMult_SeqSBAIJ_N, 1216 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1217 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1218 MatMultAdd_SeqSBAIJ_N, 1219 MatSolve_SeqSBAIJ_N, 1220 0, 1221 0, 1222 /*10*/ 0, 1223 0, 1224 MatCholeskyFactor_SeqSBAIJ, 1225 MatRelax_SeqSBAIJ, 1226 MatTranspose_SeqSBAIJ, 1227 /*15*/ MatGetInfo_SeqSBAIJ, 1228 MatEqual_SeqSBAIJ, 1229 MatGetDiagonal_SeqSBAIJ, 1230 MatDiagonalScale_SeqSBAIJ, 1231 MatNorm_SeqSBAIJ, 1232 /*20*/ 0, 1233 MatAssemblyEnd_SeqSBAIJ, 1234 0, 1235 MatSetOption_SeqSBAIJ, 1236 MatZeroEntries_SeqSBAIJ, 1237 /*25*/ 0, 1238 0, 1239 0, 1240 MatCholeskyFactorSymbolic_SeqSBAIJ, 1241 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1242 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1243 0, 1244 MatICCFactorSymbolic_SeqSBAIJ, 1245 MatGetArray_SeqSBAIJ, 1246 MatRestoreArray_SeqSBAIJ, 1247 /*35*/ MatDuplicate_SeqSBAIJ, 1248 MatForwardSolve_SeqSBAIJ_N, 1249 MatBackwardSolve_SeqSBAIJ_N, 1250 0, 1251 MatICCFactor_SeqSBAIJ, 1252 /*40*/ MatAXPY_SeqSBAIJ, 1253 MatGetSubMatrices_SeqSBAIJ, 1254 MatIncreaseOverlap_SeqSBAIJ, 1255 MatGetValues_SeqSBAIJ, 1256 MatCopy_SeqSBAIJ, 1257 /*45*/ 0, 1258 MatScale_SeqSBAIJ, 1259 0, 1260 0, 1261 0, 1262 /*50*/ 0, 1263 MatGetRowIJ_SeqSBAIJ, 1264 MatRestoreRowIJ_SeqSBAIJ, 1265 0, 1266 0, 1267 /*55*/ 0, 1268 0, 1269 0, 1270 0, 1271 MatSetValuesBlocked_SeqSBAIJ, 1272 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1273 0, 1274 0, 1275 0, 1276 0, 1277 /*65*/ 0, 1278 0, 1279 0, 1280 0, 1281 0, 1282 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1283 0, 1284 0, 1285 0, 1286 0, 1287 /*75*/ 0, 1288 0, 1289 0, 1290 0, 1291 0, 1292 /*80*/ 0, 1293 0, 1294 0, 1295 #if !defined(PETSC_USE_COMPLEX) 1296 MatGetInertia_SeqSBAIJ, 1297 #else 1298 0, 1299 #endif 1300 MatLoad_SeqSBAIJ, 1301 /*85*/ MatIsSymmetric_SeqSBAIJ, 1302 MatIsHermitian_SeqSBAIJ, 1303 MatIsStructurallySymmetric_SeqSBAIJ, 1304 0, 1305 0, 1306 /*90*/ 0, 1307 0, 1308 0, 1309 0, 1310 0, 1311 /*95*/ 0, 1312 0, 1313 0, 1314 0, 1315 0, 1316 /*100*/0, 1317 0, 1318 0, 1319 0, 1320 0, 1321 /*105*/0, 1322 MatRealPart_SeqSBAIJ, 1323 MatImaginaryPart_SeqSBAIJ, 1324 MatGetRowUpperTriangular_SeqSBAIJ, 1325 MatRestoreRowUpperTriangular_SeqSBAIJ, 1326 /*110*/0, 1327 0, 1328 0, 1329 0, 1330 MatMissingDiagonal_SeqSBAIJ 1331 }; 1332 1333 EXTERN_C_BEGIN 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1336 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1337 { 1338 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1339 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 if (aij->nonew != 1) { 1344 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1345 } 1346 1347 /* allocate space for values if not already there */ 1348 if (!aij->saved_values) { 1349 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1350 } 1351 1352 /* copy values over */ 1353 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 EXTERN_C_END 1357 1358 EXTERN_C_BEGIN 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1361 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1362 { 1363 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1364 PetscErrorCode ierr; 1365 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1366 1367 PetscFunctionBegin; 1368 if (aij->nonew != 1) { 1369 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1370 } 1371 if (!aij->saved_values) { 1372 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1373 } 1374 1375 /* copy values over */ 1376 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 EXTERN_C_END 1380 1381 EXTERN_C_BEGIN 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1384 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1385 { 1386 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1387 PetscErrorCode ierr; 1388 PetscInt i,mbs,bs2; 1389 PetscTruth skipallocation = PETSC_FALSE,flg; 1390 1391 PetscFunctionBegin; 1392 B->preallocated = PETSC_TRUE; 1393 ierr = PetscOptionsGetInt(((PetscObject)B)->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1394 B->rmap.bs = B->cmap.bs = bs; 1395 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1396 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1397 1398 mbs = B->rmap.N/bs; 1399 bs2 = bs*bs; 1400 1401 if (mbs*bs != B->rmap.N) { 1402 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1403 } 1404 1405 if (nz == MAT_SKIP_ALLOCATION) { 1406 skipallocation = PETSC_TRUE; 1407 nz = 0; 1408 } 1409 1410 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1411 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1412 if (nnz) { 1413 for (i=0; i<mbs; i++) { 1414 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1415 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1416 } 1417 } 1418 1419 ierr = PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1420 if (!flg) { 1421 switch (bs) { 1422 case 1: 1423 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1424 B->ops->solve = MatSolve_SeqSBAIJ_1; 1425 B->ops->solves = MatSolves_SeqSBAIJ_1; 1426 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1427 B->ops->mult = MatMult_SeqSBAIJ_1; 1428 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1429 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1430 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1431 break; 1432 case 2: 1433 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1434 B->ops->solve = MatSolve_SeqSBAIJ_2; 1435 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1436 B->ops->mult = MatMult_SeqSBAIJ_2; 1437 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1438 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1439 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1440 break; 1441 case 3: 1442 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1443 B->ops->solve = MatSolve_SeqSBAIJ_3; 1444 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1445 B->ops->mult = MatMult_SeqSBAIJ_3; 1446 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1447 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1448 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1449 break; 1450 case 4: 1451 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1452 B->ops->solve = MatSolve_SeqSBAIJ_4; 1453 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1454 B->ops->mult = MatMult_SeqSBAIJ_4; 1455 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1456 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1457 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1458 break; 1459 case 5: 1460 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1461 B->ops->solve = MatSolve_SeqSBAIJ_5; 1462 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1463 B->ops->mult = MatMult_SeqSBAIJ_5; 1464 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1465 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1466 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1467 break; 1468 case 6: 1469 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1470 B->ops->solve = MatSolve_SeqSBAIJ_6; 1471 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1472 B->ops->mult = MatMult_SeqSBAIJ_6; 1473 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1474 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1475 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1476 break; 1477 case 7: 1478 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1479 B->ops->solve = MatSolve_SeqSBAIJ_7; 1480 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1481 B->ops->mult = MatMult_SeqSBAIJ_7; 1482 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1483 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1484 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1485 break; 1486 } 1487 } 1488 1489 b->mbs = mbs; 1490 b->nbs = mbs; 1491 if (!skipallocation) { 1492 if (!b->imax) { 1493 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1494 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 1495 } 1496 if (!nnz) { 1497 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1498 else if (nz <= 0) nz = 1; 1499 for (i=0; i<mbs; i++) { 1500 b->imax[i] = nz; 1501 } 1502 nz = nz*mbs; /* total nz */ 1503 } else { 1504 nz = 0; 1505 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1506 } 1507 /* b->ilen will count nonzeros in each block row so far. */ 1508 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1509 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1510 1511 /* allocate the matrix space */ 1512 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 1513 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1514 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1515 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1516 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1517 b->singlemalloc = PETSC_TRUE; 1518 1519 /* pointer to beginning of each row */ 1520 b->i[0] = 0; 1521 for (i=1; i<mbs+1; i++) { 1522 b->i[i] = b->i[i-1] + b->imax[i-1]; 1523 } 1524 b->free_a = PETSC_TRUE; 1525 b->free_ij = PETSC_TRUE; 1526 } else { 1527 b->free_a = PETSC_FALSE; 1528 b->free_ij = PETSC_FALSE; 1529 } 1530 1531 B->rmap.bs = bs; 1532 b->bs2 = bs2; 1533 b->nz = 0; 1534 b->maxnz = nz*bs2; 1535 1536 b->inew = 0; 1537 b->jnew = 0; 1538 b->anew = 0; 1539 b->a2anew = 0; 1540 b->permute = PETSC_FALSE; 1541 PetscFunctionReturn(0); 1542 } 1543 EXTERN_C_END 1544 1545 EXTERN_C_BEGIN 1546 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1547 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1548 EXTERN_C_END 1549 1550 /*MC 1551 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1552 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1553 1554 Options Database Keys: 1555 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1556 1557 Level: beginner 1558 1559 .seealso: MatCreateSeqSBAIJ 1560 M*/ 1561 1562 EXTERN_C_BEGIN 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1565 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1566 { 1567 Mat_SeqSBAIJ *b; 1568 PetscErrorCode ierr; 1569 PetscMPIInt size; 1570 PetscTruth flg; 1571 1572 PetscFunctionBegin; 1573 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 1574 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1575 1576 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1577 B->data = (void*)b; 1578 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1579 B->ops->destroy = MatDestroy_SeqSBAIJ; 1580 B->ops->view = MatView_SeqSBAIJ; 1581 B->factor = 0; 1582 B->mapping = 0; 1583 b->row = 0; 1584 b->icol = 0; 1585 b->reallocs = 0; 1586 b->saved_values = 0; 1587 1588 1589 b->roworiented = PETSC_TRUE; 1590 b->nonew = 0; 1591 b->diag = 0; 1592 b->solve_work = 0; 1593 b->mult_work = 0; 1594 B->spptr = 0; 1595 b->keepzeroedrows = PETSC_FALSE; 1596 b->xtoy = 0; 1597 b->XtoY = 0; 1598 1599 b->inew = 0; 1600 b->jnew = 0; 1601 b->anew = 0; 1602 b->a2anew = 0; 1603 b->permute = PETSC_FALSE; 1604 1605 b->ignore_ltriangular = PETSC_FALSE; 1606 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1607 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1608 1609 b->getrow_utriangular = PETSC_FALSE; 1610 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1611 if (flg) b->getrow_utriangular = PETSC_TRUE; 1612 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1614 "MatStoreValues_SeqSBAIJ", 1615 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1617 "MatRetrieveValues_SeqSBAIJ", 1618 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1620 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1621 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1623 "MatConvert_SeqSBAIJ_SeqAIJ", 1624 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1626 "MatConvert_SeqSBAIJ_SeqBAIJ", 1627 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1629 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1630 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1631 1632 B->symmetric = PETSC_TRUE; 1633 B->structurally_symmetric = PETSC_TRUE; 1634 B->symmetric_set = PETSC_TRUE; 1635 B->structurally_symmetric_set = PETSC_TRUE; 1636 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 EXTERN_C_END 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1643 /*@C 1644 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1645 compressed row) format. For good matrix assembly performance the 1646 user should preallocate the matrix storage by setting the parameter nz 1647 (or the array nnz). By setting these parameters accurately, performance 1648 during matrix assembly can be increased by more than a factor of 50. 1649 1650 Collective on Mat 1651 1652 Input Parameters: 1653 + A - the symmetric matrix 1654 . bs - size of block 1655 . nz - number of block nonzeros per block row (same for all rows) 1656 - nnz - array containing the number of block nonzeros in the upper triangular plus 1657 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1658 1659 Options Database Keys: 1660 . -mat_no_unroll - uses code that does not unroll the loops in the 1661 block calculations (much slower) 1662 . -mat_block_size - size of the blocks to use 1663 1664 Level: intermediate 1665 1666 Notes: 1667 Specify the preallocated storage with either nz or nnz (not both). 1668 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1669 allocation. For additional details, see the users manual chapter on 1670 matrices. 1671 1672 You can call MatGetInfo() to get information on how effective the preallocation was; 1673 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1674 You can also run with the option -info and look for messages with the string 1675 malloc in them to see if additional memory allocation was needed. 1676 1677 If the nnz parameter is given then the nz parameter is ignored 1678 1679 1680 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1681 @*/ 1682 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1683 { 1684 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1685 1686 PetscFunctionBegin; 1687 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1688 if (f) { 1689 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatCreateSeqSBAIJ" 1696 /*@C 1697 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1698 compressed row) format. For good matrix assembly performance the 1699 user should preallocate the matrix storage by setting the parameter nz 1700 (or the array nnz). By setting these parameters accurately, performance 1701 during matrix assembly can be increased by more than a factor of 50. 1702 1703 Collective on MPI_Comm 1704 1705 Input Parameters: 1706 + comm - MPI communicator, set to PETSC_COMM_SELF 1707 . bs - size of block 1708 . m - number of rows, or number of columns 1709 . nz - number of block nonzeros per block row (same for all rows) 1710 - nnz - array containing the number of block nonzeros in the upper triangular plus 1711 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1712 1713 Output Parameter: 1714 . A - the symmetric matrix 1715 1716 Options Database Keys: 1717 . -mat_no_unroll - uses code that does not unroll the loops in the 1718 block calculations (much slower) 1719 . -mat_block_size - size of the blocks to use 1720 1721 Level: intermediate 1722 1723 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1724 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 1725 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 1726 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1727 1728 Notes: 1729 The number of rows and columns must be divisible by blocksize. 1730 This matrix type does not support complex Hermitian operation. 1731 1732 Specify the preallocated storage with either nz or nnz (not both). 1733 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1734 allocation. For additional details, see the users manual chapter on 1735 matrices. 1736 1737 If the nnz parameter is given then the nz parameter is ignored 1738 1739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1740 @*/ 1741 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1742 { 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1747 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1748 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1749 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1755 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1756 { 1757 Mat C; 1758 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1759 PetscErrorCode ierr; 1760 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1761 1762 PetscFunctionBegin; 1763 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1764 1765 *B = 0; 1766 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1767 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1768 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1769 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1770 c = (Mat_SeqSBAIJ*)C->data; 1771 1772 C->preallocated = PETSC_TRUE; 1773 C->factor = A->factor; 1774 c->row = 0; 1775 c->icol = 0; 1776 c->saved_values = 0; 1777 c->keepzeroedrows = a->keepzeroedrows; 1778 C->assembled = PETSC_TRUE; 1779 1780 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1781 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1782 c->bs2 = a->bs2; 1783 c->mbs = a->mbs; 1784 c->nbs = a->nbs; 1785 1786 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1787 for (i=0; i<mbs; i++) { 1788 c->imax[i] = a->imax[i]; 1789 c->ilen[i] = a->ilen[i]; 1790 } 1791 1792 /* allocate the matrix space */ 1793 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1794 c->singlemalloc = PETSC_TRUE; 1795 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1796 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1797 if (mbs > 0) { 1798 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1799 if (cpvalues == MAT_COPY_VALUES) { 1800 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1801 } else { 1802 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1803 } 1804 } 1805 1806 c->roworiented = a->roworiented; 1807 c->nonew = a->nonew; 1808 1809 if (a->diag) { 1810 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1811 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1812 for (i=0; i<mbs; i++) { 1813 c->diag[i] = a->diag[i]; 1814 } 1815 } else c->diag = 0; 1816 c->nz = a->nz; 1817 c->maxnz = a->maxnz; 1818 c->solve_work = 0; 1819 c->mult_work = 0; 1820 c->free_a = PETSC_TRUE; 1821 c->free_ij = PETSC_TRUE; 1822 *B = C; 1823 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1829 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1830 { 1831 Mat_SeqSBAIJ *a; 1832 Mat B; 1833 PetscErrorCode ierr; 1834 int fd; 1835 PetscMPIInt size; 1836 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1837 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1838 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1839 PetscInt *masked,nmask,tmp,bs2,ishift; 1840 PetscScalar *aa; 1841 MPI_Comm comm = ((PetscObject)viewer)->comm; 1842 1843 PetscFunctionBegin; 1844 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1845 bs2 = bs*bs; 1846 1847 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1848 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1849 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1850 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1851 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1852 M = header[1]; N = header[2]; nz = header[3]; 1853 1854 if (header[3] < 0) { 1855 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1856 } 1857 1858 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1859 1860 /* 1861 This code adds extra rows to make sure the number of rows is 1862 divisible by the blocksize 1863 */ 1864 mbs = M/bs; 1865 extra_rows = bs - M + bs*(mbs); 1866 if (extra_rows == bs) extra_rows = 0; 1867 else mbs++; 1868 if (extra_rows) { 1869 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1870 } 1871 1872 /* read in row lengths */ 1873 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1874 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1875 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1876 1877 /* read in column indices */ 1878 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1879 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1880 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1881 1882 /* loop over row lengths determining block row lengths */ 1883 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1884 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1885 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1886 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1887 masked = mask + mbs; 1888 rowcount = 0; nzcount = 0; 1889 for (i=0; i<mbs; i++) { 1890 nmask = 0; 1891 for (j=0; j<bs; j++) { 1892 kmax = rowlengths[rowcount]; 1893 for (k=0; k<kmax; k++) { 1894 tmp = jj[nzcount++]/bs; /* block col. index */ 1895 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1896 } 1897 rowcount++; 1898 } 1899 s_browlengths[i] += nmask; 1900 1901 /* zero out the mask elements we set */ 1902 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1903 } 1904 1905 /* create our matrix */ 1906 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1907 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1908 ierr = MatSetType(B,type);CHKERRQ(ierr); 1909 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1910 a = (Mat_SeqSBAIJ*)B->data; 1911 1912 /* set matrix "i" values */ 1913 a->i[0] = 0; 1914 for (i=1; i<= mbs; i++) { 1915 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1916 a->ilen[i-1] = s_browlengths[i-1]; 1917 } 1918 a->nz = a->i[mbs]; 1919 1920 /* read in nonzero values */ 1921 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1922 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1923 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1924 1925 /* set "a" and "j" values into matrix */ 1926 nzcount = 0; jcount = 0; 1927 for (i=0; i<mbs; i++) { 1928 nzcountb = nzcount; 1929 nmask = 0; 1930 for (j=0; j<bs; j++) { 1931 kmax = rowlengths[i*bs+j]; 1932 for (k=0; k<kmax; k++) { 1933 tmp = jj[nzcount++]/bs; /* block col. index */ 1934 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1935 } 1936 } 1937 /* sort the masked values */ 1938 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1939 1940 /* set "j" values into matrix */ 1941 maskcount = 1; 1942 for (j=0; j<nmask; j++) { 1943 a->j[jcount++] = masked[j]; 1944 mask[masked[j]] = maskcount++; 1945 } 1946 1947 /* set "a" values into matrix */ 1948 ishift = bs2*a->i[i]; 1949 for (j=0; j<bs; j++) { 1950 kmax = rowlengths[i*bs+j]; 1951 for (k=0; k<kmax; k++) { 1952 tmp = jj[nzcountb]/bs ; /* block col. index */ 1953 if (tmp >= i){ 1954 block = mask[tmp] - 1; 1955 point = jj[nzcountb] - bs*tmp; 1956 idx = ishift + bs2*block + j + bs*point; 1957 a->a[idx] = aa[nzcountb]; 1958 } 1959 nzcountb++; 1960 } 1961 } 1962 /* zero out the mask elements we set */ 1963 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1964 } 1965 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1966 1967 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1968 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1969 ierr = PetscFree(aa);CHKERRQ(ierr); 1970 ierr = PetscFree(jj);CHKERRQ(ierr); 1971 ierr = PetscFree(mask);CHKERRQ(ierr); 1972 1973 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1974 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1975 ierr = MatView_Private(B);CHKERRQ(ierr); 1976 *A = B; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1982 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1983 { 1984 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1985 MatScalar *aa=a->a,*v,*v1; 1986 PetscScalar *x,*b,*t,sum,d; 1987 PetscErrorCode ierr; 1988 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1989 PetscInt nz,nz1,*vj,*vj1,i; 1990 1991 PetscFunctionBegin; 1992 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1993 1994 its = its*lits; 1995 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1996 1997 if (bs > 1) 1998 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1999 2000 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2001 if (xx != bb) { 2002 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2003 } else { 2004 b = x; 2005 } 2006 2007 if (!a->relax_work) { 2008 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 2009 } 2010 t = a->relax_work; 2011 2012 if (flag & SOR_ZERO_INITIAL_GUESS) { 2013 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2014 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2015 2016 for (i=0; i<m; i++){ 2017 d = *(aa + ai[i]); /* diag[i] */ 2018 v = aa + ai[i] + 1; 2019 vj = aj + ai[i] + 1; 2020 nz = ai[i+1] - ai[i] - 1; 2021 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2022 x[i] = omega*t[i]/d; 2023 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2024 } 2025 } 2026 2027 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2028 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2029 t = b; 2030 } 2031 2032 for (i=m-1; i>=0; i--){ 2033 d = *(aa + ai[i]); 2034 v = aa + ai[i] + 1; 2035 vj = aj + ai[i] + 1; 2036 nz = ai[i+1] - ai[i] - 1; 2037 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2038 sum = t[i]; 2039 while (nz--) sum -= x[*vj++]*(*v++); 2040 x[i] = (1-omega)*x[i] + omega*sum/d; 2041 } 2042 t = a->relax_work; 2043 } 2044 its--; 2045 } 2046 2047 while (its--) { 2048 /* 2049 forward sweep: 2050 for i=0,...,m-1: 2051 sum[i] = (b[i] - U(i,:)x )/d[i]; 2052 x[i] = (1-omega)x[i] + omega*sum[i]; 2053 b = b - x[i]*U^T(i,:); 2054 2055 */ 2056 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2057 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2058 2059 for (i=0; i<m; i++){ 2060 d = *(aa + ai[i]); /* diag[i] */ 2061 v = aa + ai[i] + 1; v1=v; 2062 vj = aj + ai[i] + 1; vj1=vj; 2063 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2064 sum = t[i]; 2065 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2066 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2067 x[i] = (1-omega)*x[i] + omega*sum/d; 2068 while (nz--) t[*vj++] -= x[i]*(*v++); 2069 } 2070 } 2071 2072 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2073 /* 2074 backward sweep: 2075 b = b - x[i]*U^T(i,:), i=0,...,n-2 2076 for i=m-1,...,0: 2077 sum[i] = (b[i] - U(i,:)x )/d[i]; 2078 x[i] = (1-omega)x[i] + omega*sum[i]; 2079 */ 2080 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2081 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2082 2083 for (i=0; i<m-1; i++){ /* update rhs */ 2084 v = aa + ai[i] + 1; 2085 vj = aj + ai[i] + 1; 2086 nz = ai[i+1] - ai[i] - 1; 2087 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2088 while (nz--) t[*vj++] -= x[i]*(*v++); 2089 } 2090 for (i=m-1; i>=0; i--){ 2091 d = *(aa + ai[i]); 2092 v = aa + ai[i] + 1; 2093 vj = aj + ai[i] + 1; 2094 nz = ai[i+1] - ai[i] - 1; 2095 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2096 sum = t[i]; 2097 while (nz--) sum -= x[*vj++]*(*v++); 2098 x[i] = (1-omega)*x[i] + omega*sum/d; 2099 } 2100 } 2101 } 2102 2103 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2104 if (bb != xx) { 2105 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2106 } 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2112 /*@ 2113 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2114 (upper triangular entries in CSR format) provided by the user. 2115 2116 Collective on MPI_Comm 2117 2118 Input Parameters: 2119 + comm - must be an MPI communicator of size 1 2120 . bs - size of block 2121 . m - number of rows 2122 . n - number of columns 2123 . i - row indices 2124 . j - column indices 2125 - a - matrix values 2126 2127 Output Parameter: 2128 . mat - the matrix 2129 2130 Level: intermediate 2131 2132 Notes: 2133 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2134 once the matrix is destroyed 2135 2136 You cannot set new nonzero locations into this matrix, that will generate an error. 2137 2138 The i and j indices are 0 based 2139 2140 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2141 2142 @*/ 2143 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2144 { 2145 PetscErrorCode ierr; 2146 PetscInt ii; 2147 Mat_SeqSBAIJ *sbaij; 2148 2149 PetscFunctionBegin; 2150 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2151 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2152 2153 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2154 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2155 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2156 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2157 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2158 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2159 2160 sbaij->i = i; 2161 sbaij->j = j; 2162 sbaij->a = a; 2163 sbaij->singlemalloc = PETSC_FALSE; 2164 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2165 sbaij->free_a = PETSC_FALSE; 2166 sbaij->free_ij = PETSC_FALSE; 2167 2168 for (ii=0; ii<m; ii++) { 2169 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2170 #if defined(PETSC_USE_DEBUG) 2171 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2172 #endif 2173 } 2174 #if defined(PETSC_USE_DEBUG) 2175 for (ii=0; ii<sbaij->i[m]; ii++) { 2176 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2177 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2178 } 2179 #endif 2180 2181 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2182 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 2187 2188 2189 2190