1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 *missing = PETSC_FALSE; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 *missing = PETSC_TRUE; 31 if (dd) *dd = i; 32 break; 33 } 34 } 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 41 { 42 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 46 PetscFunctionBegin; 47 if (!a->diag) { 48 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 49 } 50 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 *nn = n; 64 if (!ia) PetscFunctionReturn(0); 65 if (!blockcompressed) { 66 /* malloc & create the natural set of indices */ 67 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 68 for (i=0; i<n+1; i++) { 69 for (j=0; j<bs; j++) { 70 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 71 } 72 } 73 for (i=0; i<nz; i++) { 74 for (j=0; j<bs; j++) { 75 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 76 } 77 } 78 } else { /* blockcompressed */ 79 if (oshift == 1) { 80 /* temporarily add 1 to i and j indices */ 81 for (i=0; i<nz; i++) a->j[i]++; 82 for (i=0; i<n+1; i++) a->i[i]++; 83 } 84 *ia = a->i; *ja = a->j; 85 } 86 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscInt i,n = a->mbs,nz = a->i[n]; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 if (!ia) PetscFunctionReturn(0); 100 101 if (!blockcompressed) { 102 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 103 } else if (oshift == 1) { /* blockcompressed */ 104 for (i=0; i<nz; i++) a->j[i]--; 105 for (i=0; i<n+1; i++) a->i[i]--; 106 } 107 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 114 { 115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 #if defined(PETSC_USE_LOG) 120 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 121 #endif 122 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 123 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 124 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 ierr = PetscFree(a->diag);CHKERRQ(ierr); 127 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 128 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 129 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 130 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 131 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 132 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 133 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 134 135 ierr = PetscFree(a->inew);CHKERRQ(ierr); 136 ierr = PetscFree(a);CHKERRQ(ierr); 137 138 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 140 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 142 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 143 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 144 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 150 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 151 { 152 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 switch (op) { 157 case MAT_ROW_ORIENTED: 158 a->roworiented = flg; 159 break; 160 case MAT_KEEP_ZEROED_ROWS: 161 a->keepzeroedrows = flg; 162 break; 163 case MAT_NEW_NONZERO_LOCATIONS: 164 a->nonew = (flg ? 0 : 1); 165 break; 166 case MAT_NEW_NONZERO_LOCATION_ERR: 167 a->nonew = (flg ? -1 : 0); 168 break; 169 case MAT_NEW_NONZERO_ALLOCATION_ERR: 170 a->nonew = (flg ? -2 : 0); 171 break; 172 case MAT_NEW_DIAGONALS: 173 case MAT_IGNORE_OFF_PROC_ENTRIES: 174 case MAT_USE_HASH_TABLE: 175 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 176 break; 177 case MAT_HERMITIAN: 178 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 179 case MAT_SYMMETRIC: 180 case MAT_STRUCTURALLY_SYMMETRIC: 181 case MAT_SYMMETRY_ETERNAL: 182 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 183 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 184 break; 185 case MAT_IGNORE_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = flg; 187 break; 188 case MAT_ERROR_LOWER_TRIANGULAR: 189 a->ignore_ltriangular = flg; 190 break; 191 case MAT_GETROW_UPPERTRIANGULAR: 192 a->getrow_utriangular = flg; 193 break; 194 default: 195 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 202 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 203 { 204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 205 PetscErrorCode ierr; 206 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 207 MatScalar *aa,*aa_i; 208 PetscScalar *v_i; 209 210 PetscFunctionBegin; 211 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 212 /* Get the upper triangular part of the row */ 213 bs = A->rmap.bs; 214 ai = a->i; 215 aj = a->j; 216 aa = a->a; 217 bs2 = a->bs2; 218 219 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 220 221 bn = row/bs; /* Block number */ 222 bp = row % bs; /* Block position */ 223 M = ai[bn+1] - ai[bn]; 224 *ncols = bs*M; 225 226 if (v) { 227 *v = 0; 228 if (*ncols) { 229 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 230 for (i=0; i<M; i++) { /* for each block in the block row */ 231 v_i = *v + i*bs; 232 aa_i = aa + bs2*(ai[bn] + i); 233 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 234 } 235 } 236 } 237 238 if (cols) { 239 *cols = 0; 240 if (*ncols) { 241 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 242 for (i=0; i<M; i++) { /* for each block in the block row */ 243 cols_i = *cols + i*bs; 244 itmp = bs*aj[ai[bn] + i]; 245 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 246 } 247 } 248 } 249 250 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 251 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 252 #ifdef column_search 253 v_i = *v + M*bs; 254 cols_i = *cols + M*bs; 255 for (i=0; i<bn; i++){ /* for each block row */ 256 M = ai[i+1] - ai[i]; 257 for (j=0; j<M; j++){ 258 itmp = aj[ai[i] + j]; /* block column value */ 259 if (itmp == bn){ 260 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 261 for (k=0; k<bs; k++) { 262 *cols_i++ = i*bs+k; 263 *v_i++ = aa_i[k]; 264 } 265 *ncols += bs; 266 break; 267 } 268 } 269 } 270 #endif 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 276 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 277 { 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 282 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 288 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 292 PetscFunctionBegin; 293 a->getrow_utriangular = PETSC_TRUE; 294 PetscFunctionReturn(0); 295 } 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 298 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 299 { 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 301 302 PetscFunctionBegin; 303 a->getrow_utriangular = PETSC_FALSE; 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 309 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B) 310 { 311 PetscErrorCode ierr; 312 PetscFunctionBegin; 313 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 314 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNCT__ 320 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 321 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 322 { 323 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 324 PetscErrorCode ierr; 325 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 326 const char *name; 327 PetscViewerFormat format; 328 329 PetscFunctionBegin; 330 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 331 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 332 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 333 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 334 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 335 Mat aij; 336 337 if (A->factor && bs>1){ 338 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 342 ierr = MatView(aij,viewer);CHKERRQ(ierr); 343 ierr = MatDestroy(aij);CHKERRQ(ierr); 344 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 345 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 346 for (i=0; i<a->mbs; i++) { 347 for (j=0; j<bs; j++) { 348 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 349 for (k=a->i[i]; k<a->i[i+1]; k++) { 350 for (l=0; l<bs; l++) { 351 #if defined(PETSC_USE_COMPLEX) 352 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 353 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 354 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 355 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 356 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 357 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 358 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 359 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 360 } 361 #else 362 if (a->a[bs2*k + l*bs + j] != 0.0) { 363 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 364 } 365 #endif 366 } 367 } 368 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 369 } 370 } 371 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 372 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 373 PetscFunctionReturn(0); 374 } else { 375 if (A->factor && bs>1){ 376 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 377 } 378 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 379 for (i=0; i<a->mbs; i++) { 380 for (j=0; j<bs; j++) { 381 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 382 for (k=a->i[i]; k<a->i[i+1]; k++) { 383 for (l=0; l<bs; l++) { 384 #if defined(PETSC_USE_COMPLEX) 385 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 386 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 387 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 388 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 389 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 390 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 393 } 394 #else 395 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 396 #endif 397 } 398 } 399 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 400 } 401 } 402 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 403 } 404 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 410 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 411 { 412 Mat A = (Mat) Aa; 413 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 414 PetscErrorCode ierr; 415 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 416 PetscMPIInt rank; 417 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 418 MatScalar *aa; 419 MPI_Comm comm; 420 PetscViewer viewer; 421 422 PetscFunctionBegin; 423 /* 424 This is nasty. If this is called from an originally parallel matrix 425 then all processes call this,but only the first has the matrix so the 426 rest should return immediately. 427 */ 428 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 429 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 430 if (rank) PetscFunctionReturn(0); 431 432 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 433 434 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 435 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 436 437 /* loop over matrix elements drawing boxes */ 438 color = PETSC_DRAW_BLUE; 439 for (i=0,row=0; i<mbs; i++,row+=bs) { 440 for (j=a->i[i]; j<a->i[i+1]; j++) { 441 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 442 x_l = a->j[j]*bs; x_r = x_l + 1.0; 443 aa = a->a + j*bs2; 444 for (k=0; k<bs; k++) { 445 for (l=0; l<bs; l++) { 446 if (PetscRealPart(*aa++) >= 0.) continue; 447 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 448 } 449 } 450 } 451 } 452 color = PETSC_DRAW_CYAN; 453 for (i=0,row=0; i<mbs; i++,row+=bs) { 454 for (j=a->i[i]; j<a->i[i+1]; j++) { 455 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 456 x_l = a->j[j]*bs; x_r = x_l + 1.0; 457 aa = a->a + j*bs2; 458 for (k=0; k<bs; k++) { 459 for (l=0; l<bs; l++) { 460 if (PetscRealPart(*aa++) != 0.) continue; 461 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 467 color = PETSC_DRAW_RED; 468 for (i=0,row=0; i<mbs; i++,row+=bs) { 469 for (j=a->i[i]; j<a->i[i+1]; j++) { 470 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 471 x_l = a->j[j]*bs; x_r = x_l + 1.0; 472 aa = a->a + j*bs2; 473 for (k=0; k<bs; k++) { 474 for (l=0; l<bs; l++) { 475 if (PetscRealPart(*aa++) <= 0.) continue; 476 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 477 } 478 } 479 } 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 486 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 487 { 488 PetscErrorCode ierr; 489 PetscReal xl,yl,xr,yr,w,h; 490 PetscDraw draw; 491 PetscTruth isnull; 492 493 PetscFunctionBegin; 494 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 495 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 496 497 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 498 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 499 xr += w; yr += h; xl = -w; yl = -h; 500 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 501 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 502 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_SeqSBAIJ" 508 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 509 { 510 PetscErrorCode ierr; 511 PetscTruth iascii,isdraw; 512 513 PetscFunctionBegin; 514 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 515 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 516 if (iascii){ 517 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 518 } else if (isdraw) { 519 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 520 } else { 521 Mat B; 522 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 523 ierr = MatView(B,viewer);CHKERRQ(ierr); 524 ierr = MatDestroy(B);CHKERRQ(ierr); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 532 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 533 { 534 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 535 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 536 PetscInt *ai = a->i,*ailen = a->ilen; 537 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 538 MatScalar *ap,*aa = a->a; 539 540 PetscFunctionBegin; 541 for (k=0; k<m; k++) { /* loop over rows */ 542 row = im[k]; brow = row/bs; 543 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 544 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 545 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 546 nrow = ailen[brow]; 547 for (l=0; l<n; l++) { /* loop over columns */ 548 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 549 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 550 col = in[l] ; 551 bcol = col/bs; 552 cidx = col%bs; 553 ridx = row%bs; 554 high = nrow; 555 low = 0; /* assume unsorted */ 556 while (high-low > 5) { 557 t = (low+high)/2; 558 if (rp[t] > bcol) high = t; 559 else low = t; 560 } 561 for (i=low; i<high; i++) { 562 if (rp[i] > bcol) break; 563 if (rp[i] == bcol) { 564 *v++ = ap[bs2*i+bs*cidx+ridx]; 565 goto finished; 566 } 567 } 568 *v++ = 0.0; 569 finished:; 570 } 571 } 572 PetscFunctionReturn(0); 573 } 574 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 578 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 579 { 580 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 583 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 585 PetscTruth roworiented=a->roworiented; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (roworiented) { 591 stepval = (n-1)*bs; 592 } else { 593 stepval = (m-1)*bs; 594 } 595 for (k=0; k<m; k++) { /* loop over added rows */ 596 row = im[k]; 597 if (row < 0) continue; 598 #if defined(PETSC_USE_DEBUG) 599 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 600 #endif 601 rp = aj + ai[row]; 602 ap = aa + bs2*ai[row]; 603 rmax = imax[row]; 604 nrow = ailen[row]; 605 low = 0; 606 high = nrow; 607 for (l=0; l<n; l++) { /* loop over added columns */ 608 if (in[l] < 0) continue; 609 col = in[l]; 610 #if defined(PETSC_USE_DEBUG) 611 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 612 #endif 613 if (col < row) continue; /* ignore lower triangular block */ 614 if (roworiented) { 615 value = v + k*(stepval+bs)*bs + l*bs; 616 } else { 617 value = v + l*(stepval+bs)*bs + k*bs; 618 } 619 if (col <= lastcol) low = 0; else high = nrow; 620 lastcol = col; 621 while (high-low > 7) { 622 t = (low+high)/2; 623 if (rp[t] > col) high = t; 624 else low = t; 625 } 626 for (i=low; i<high; i++) { 627 if (rp[i] > col) break; 628 if (rp[i] == col) { 629 bap = ap + bs2*i; 630 if (roworiented) { 631 if (is == ADD_VALUES) { 632 for (ii=0; ii<bs; ii++,value+=stepval) { 633 for (jj=ii; jj<bs2; jj+=bs) { 634 bap[jj] += *value++; 635 } 636 } 637 } else { 638 for (ii=0; ii<bs; ii++,value+=stepval) { 639 for (jj=ii; jj<bs2; jj+=bs) { 640 bap[jj] = *value++; 641 } 642 } 643 } 644 } else { 645 if (is == ADD_VALUES) { 646 for (ii=0; ii<bs; ii++,value+=stepval) { 647 for (jj=0; jj<bs; jj++) { 648 *bap++ += *value++; 649 } 650 } 651 } else { 652 for (ii=0; ii<bs; ii++,value+=stepval) { 653 for (jj=0; jj<bs; jj++) { 654 *bap++ = *value++; 655 } 656 } 657 } 658 } 659 goto noinsert2; 660 } 661 } 662 if (nonew == 1) goto noinsert2; 663 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 664 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 665 N = nrow++ - 1; high++; 666 /* shift up all the later entries in this row */ 667 for (ii=N; ii>=i; ii--) { 668 rp[ii+1] = rp[ii]; 669 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 670 } 671 if (N >= i) { 672 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 673 } 674 rp[i] = col; 675 bap = ap + bs2*i; 676 if (roworiented) { 677 for (ii=0; ii<bs; ii++,value+=stepval) { 678 for (jj=ii; jj<bs2; jj+=bs) { 679 bap[jj] = *value++; 680 } 681 } 682 } else { 683 for (ii=0; ii<bs; ii++,value+=stepval) { 684 for (jj=0; jj<bs; jj++) { 685 *bap++ = *value++; 686 } 687 } 688 } 689 noinsert2:; 690 low = i; 691 } 692 ailen[row] = nrow; 693 } 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 699 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 700 { 701 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 702 PetscErrorCode ierr; 703 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 704 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 705 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 706 MatScalar *aa = a->a,*ap; 707 708 PetscFunctionBegin; 709 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 710 711 if (m) rmax = ailen[0]; 712 for (i=1; i<mbs; i++) { 713 /* move each row back by the amount of empty slots (fshift) before it*/ 714 fshift += imax[i-1] - ailen[i-1]; 715 rmax = PetscMax(rmax,ailen[i]); 716 if (fshift) { 717 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 718 N = ailen[i]; 719 for (j=0; j<N; j++) { 720 ip[j-fshift] = ip[j]; 721 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 722 } 723 } 724 ai[i] = ai[i-1] + ailen[i-1]; 725 } 726 if (mbs) { 727 fshift += imax[mbs-1] - ailen[mbs-1]; 728 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 729 } 730 /* reset ilen and imax for each row */ 731 for (i=0; i<mbs; i++) { 732 ailen[i] = imax[i] = ai[i+1] - ai[i]; 733 } 734 a->nz = ai[mbs]; 735 736 /* diagonals may have moved, reset it */ 737 if (a->diag) { 738 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 739 } 740 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 741 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 742 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 743 a->reallocs = 0; 744 A->info.nz_unneeded = (PetscReal)fshift*bs2; 745 PetscFunctionReturn(0); 746 } 747 748 /* 749 This function returns an array of flags which indicate the locations of contiguous 750 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 751 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 752 Assume: sizes should be long enough to hold all the values. 753 */ 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 756 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 757 { 758 PetscInt i,j,k,row; 759 PetscTruth flg; 760 761 PetscFunctionBegin; 762 for (i=0,j=0; i<n; j++) { 763 row = idx[i]; 764 if (row%bs!=0) { /* Not the begining of a block */ 765 sizes[j] = 1; 766 i++; 767 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 768 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 769 i++; 770 } else { /* Begining of the block, so check if the complete block exists */ 771 flg = PETSC_TRUE; 772 for (k=1; k<bs; k++) { 773 if (row+k != idx[i+k]) { /* break in the block */ 774 flg = PETSC_FALSE; 775 break; 776 } 777 } 778 if (flg) { /* No break in the bs */ 779 sizes[j] = bs; 780 i+= bs; 781 } else { 782 sizes[j] = 1; 783 i++; 784 } 785 } 786 } 787 *bs_max = j; 788 PetscFunctionReturn(0); 789 } 790 791 792 /* Only add/insert a(i,j) with i<=j (blocks). 793 Any a(i,j) with i>j input by user is ingored. 794 */ 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 798 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 799 { 800 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 801 PetscErrorCode ierr; 802 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 803 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 804 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 805 PetscInt ridx,cidx,bs2=a->bs2; 806 MatScalar *ap,value,*aa=a->a,*bap; 807 808 PetscFunctionBegin; 809 for (k=0; k<m; k++) { /* loop over added rows */ 810 row = im[k]; /* row number */ 811 brow = row/bs; /* block row number */ 812 if (row < 0) continue; 813 #if defined(PETSC_USE_DEBUG) 814 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 815 #endif 816 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 817 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 818 rmax = imax[brow]; /* maximum space allocated for this row */ 819 nrow = ailen[brow]; /* actual length of this row */ 820 low = 0; 821 822 for (l=0; l<n; l++) { /* loop over added columns */ 823 if (in[l] < 0) continue; 824 #if defined(PETSC_USE_DEBUG) 825 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 826 #endif 827 col = in[l]; 828 bcol = col/bs; /* block col number */ 829 830 if (brow > bcol) { 831 if (a->ignore_ltriangular){ 832 continue; /* ignore lower triangular values */ 833 } else { 834 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 835 } 836 } 837 838 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 839 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 840 /* element value a(k,l) */ 841 if (roworiented) { 842 value = v[l + k*n]; 843 } else { 844 value = v[k + l*m]; 845 } 846 847 /* move pointer bap to a(k,l) quickly and add/insert value */ 848 if (col <= lastcol) low = 0; high = nrow; 849 lastcol = col; 850 while (high-low > 7) { 851 t = (low+high)/2; 852 if (rp[t] > bcol) high = t; 853 else low = t; 854 } 855 for (i=low; i<high; i++) { 856 if (rp[i] > bcol) break; 857 if (rp[i] == bcol) { 858 bap = ap + bs2*i + bs*cidx + ridx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 862 if (brow == bcol && ridx < cidx){ 863 bap = ap + bs2*i + bs*ridx + cidx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 } 867 goto noinsert1; 868 } 869 } 870 871 if (nonew == 1) goto noinsert1; 872 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 873 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 874 875 N = nrow++ - 1; high++; 876 /* shift up all the later entries in this row */ 877 for (ii=N; ii>=i; ii--) { 878 rp[ii+1] = rp[ii]; 879 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 880 } 881 if (N>=i) { 882 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 883 } 884 rp[i] = bcol; 885 ap[bs2*i + bs*cidx + ridx] = value; 886 noinsert1:; 887 low = i; 888 } 889 } /* end of loop over added columns */ 890 ailen[brow] = nrow; 891 } /* end of loop over added rows */ 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 897 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 898 { 899 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 900 Mat outA; 901 PetscErrorCode ierr; 902 PetscTruth row_identity; 903 904 PetscFunctionBegin; 905 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 906 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 907 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 908 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 909 910 outA = inA; 911 inA->factor = MAT_FACTOR_CHOLESKY; 912 913 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 914 /* 915 Blocksize < 8 have a special faster factorization/solver 916 for ICC(0) factorization with natural ordering 917 */ 918 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 919 case 1: 920 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 921 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 922 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 923 inA->ops->solves = MatSolves_SeqSBAIJ_1; 924 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 925 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 926 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 927 break; 928 case 2: 929 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 930 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 931 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 932 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 933 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 934 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 935 break; 936 case 3: 937 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 938 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 939 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 940 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 941 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 942 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 943 break; 944 case 4: 945 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 946 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 947 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 948 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 949 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 950 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 951 break; 952 case 5: 953 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 954 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 955 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 956 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 957 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 958 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 959 break; 960 case 6: 961 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 962 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 963 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 964 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 965 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 966 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 967 break; 968 case 7: 969 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 970 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 971 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 972 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 973 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 974 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 975 break; 976 default: 977 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 978 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 979 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 980 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 981 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 982 break; 983 } 984 985 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 986 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 987 a->row = row; 988 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 989 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 990 a->col = row; 991 992 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 993 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 994 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 995 996 if (!a->solve_work) { 997 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 998 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 999 } 1000 1001 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 EXTERN_C_BEGIN 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1008 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1009 { 1010 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1011 PetscInt i,nz,n; 1012 1013 PetscFunctionBegin; 1014 nz = baij->maxnz; 1015 n = mat->cmap.n; 1016 for (i=0; i<nz; i++) { 1017 baij->j[i] = indices[i]; 1018 } 1019 baij->nz = nz; 1020 for (i=0; i<n; i++) { 1021 baij->ilen[i] = baij->imax[i]; 1022 } 1023 PetscFunctionReturn(0); 1024 } 1025 EXTERN_C_END 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1029 /*@ 1030 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1031 in the matrix. 1032 1033 Input Parameters: 1034 + mat - the SeqSBAIJ matrix 1035 - indices - the column indices 1036 1037 Level: advanced 1038 1039 Notes: 1040 This can be called if you have precomputed the nonzero structure of the 1041 matrix and want to provide it to the matrix object to improve the performance 1042 of the MatSetValues() operation. 1043 1044 You MUST have set the correct numbers of nonzeros per row in the call to 1045 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1046 1047 MUST be called before any calls to MatSetValues() 1048 1049 .seealso: MatCreateSeqSBAIJ 1050 @*/ 1051 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1052 { 1053 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1054 1055 PetscFunctionBegin; 1056 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1057 PetscValidPointer(indices,2); 1058 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1059 if (f) { 1060 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1061 } else { 1062 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1069 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1070 { 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* If the two matrices have the same copy implementation, use fast copy. */ 1075 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1076 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1077 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1078 1079 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1080 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1081 } 1082 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1083 } else { 1084 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1085 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1086 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1093 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1094 { 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1104 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1105 { 1106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1107 PetscFunctionBegin; 1108 *array = a->a; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1114 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1115 { 1116 PetscFunctionBegin; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #include "petscblaslapack.h" 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1123 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1124 { 1125 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1126 PetscErrorCode ierr; 1127 PetscInt i,bs=Y->rmap.bs,bs2,j; 1128 PetscBLASInt one = 1,bnz = PetscBLASIntCast(x->nz); 1129 1130 PetscFunctionBegin; 1131 if (str == SAME_NONZERO_PATTERN) { 1132 PetscScalar alpha = a; 1133 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1134 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1135 if (y->xtoy && y->XtoY != X) { 1136 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1137 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1138 } 1139 if (!y->xtoy) { /* get xtoy */ 1140 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1141 y->XtoY = X; 1142 } 1143 bs2 = bs*bs; 1144 for (i=0; i<x->nz; i++) { 1145 j = 0; 1146 while (j < bs2){ 1147 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1148 j++; 1149 } 1150 } 1151 ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1152 } else { 1153 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1154 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1155 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1156 } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1162 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1163 { 1164 PetscFunctionBegin; 1165 *flg = PETSC_TRUE; 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1171 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1172 { 1173 PetscFunctionBegin; 1174 *flg = PETSC_TRUE; 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1180 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1181 { 1182 PetscFunctionBegin; 1183 *flg = PETSC_FALSE; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1189 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1190 { 1191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1192 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1193 MatScalar *aa = a->a; 1194 1195 PetscFunctionBegin; 1196 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1202 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1203 { 1204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1205 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1206 MatScalar *aa = a->a; 1207 1208 PetscFunctionBegin; 1209 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* -------------------------------------------------------------------*/ 1214 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1215 MatGetRow_SeqSBAIJ, 1216 MatRestoreRow_SeqSBAIJ, 1217 MatMult_SeqSBAIJ_N, 1218 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1219 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1220 MatMultAdd_SeqSBAIJ_N, 1221 MatSolve_SeqSBAIJ_N, 1222 0, 1223 0, 1224 /*10*/ 0, 1225 0, 1226 MatCholeskyFactor_SeqSBAIJ, 1227 MatRelax_SeqSBAIJ, 1228 MatTranspose_SeqSBAIJ, 1229 /*15*/ MatGetInfo_SeqSBAIJ, 1230 MatEqual_SeqSBAIJ, 1231 MatGetDiagonal_SeqSBAIJ, 1232 MatDiagonalScale_SeqSBAIJ, 1233 MatNorm_SeqSBAIJ, 1234 /*20*/ 0, 1235 MatAssemblyEnd_SeqSBAIJ, 1236 0, 1237 MatSetOption_SeqSBAIJ, 1238 MatZeroEntries_SeqSBAIJ, 1239 /*25*/ 0, 1240 0, 1241 0, 1242 MatCholeskyFactorSymbolic_SeqSBAIJ, 1243 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1244 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1245 0, 1246 MatICCFactorSymbolic_SeqSBAIJ, 1247 MatGetArray_SeqSBAIJ, 1248 MatRestoreArray_SeqSBAIJ, 1249 /*35*/ MatDuplicate_SeqSBAIJ, 1250 MatForwardSolve_SeqSBAIJ_N, 1251 MatBackwardSolve_SeqSBAIJ_N, 1252 0, 1253 MatICCFactor_SeqSBAIJ, 1254 /*40*/ MatAXPY_SeqSBAIJ, 1255 MatGetSubMatrices_SeqSBAIJ, 1256 MatIncreaseOverlap_SeqSBAIJ, 1257 MatGetValues_SeqSBAIJ, 1258 MatCopy_SeqSBAIJ, 1259 /*45*/ 0, 1260 MatScale_SeqSBAIJ, 1261 0, 1262 0, 1263 0, 1264 /*50*/ 0, 1265 MatGetRowIJ_SeqSBAIJ, 1266 MatRestoreRowIJ_SeqSBAIJ, 1267 0, 1268 0, 1269 /*55*/ 0, 1270 0, 1271 0, 1272 0, 1273 MatSetValuesBlocked_SeqSBAIJ, 1274 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1275 0, 1276 0, 1277 0, 1278 0, 1279 /*65*/ 0, 1280 0, 1281 0, 1282 0, 1283 0, 1284 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1285 0, 1286 0, 1287 0, 1288 0, 1289 /*75*/ 0, 1290 0, 1291 0, 1292 0, 1293 0, 1294 /*80*/ 0, 1295 0, 1296 0, 1297 #if !defined(PETSC_USE_COMPLEX) 1298 MatGetInertia_SeqSBAIJ, 1299 #else 1300 0, 1301 #endif 1302 MatLoad_SeqSBAIJ, 1303 /*85*/ MatIsSymmetric_SeqSBAIJ, 1304 MatIsHermitian_SeqSBAIJ, 1305 MatIsStructurallySymmetric_SeqSBAIJ, 1306 0, 1307 0, 1308 /*90*/ 0, 1309 0, 1310 0, 1311 0, 1312 0, 1313 /*95*/ 0, 1314 0, 1315 0, 1316 0, 1317 0, 1318 /*100*/0, 1319 0, 1320 0, 1321 0, 1322 0, 1323 /*105*/0, 1324 MatRealPart_SeqSBAIJ, 1325 MatImaginaryPart_SeqSBAIJ, 1326 MatGetRowUpperTriangular_SeqSBAIJ, 1327 MatRestoreRowUpperTriangular_SeqSBAIJ, 1328 /*110*/0, 1329 0, 1330 0, 1331 0, 1332 MatMissingDiagonal_SeqSBAIJ 1333 }; 1334 1335 EXTERN_C_BEGIN 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1338 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1339 { 1340 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1341 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 if (aij->nonew != 1) { 1346 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1347 } 1348 1349 /* allocate space for values if not already there */ 1350 if (!aij->saved_values) { 1351 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1352 } 1353 1354 /* copy values over */ 1355 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 EXTERN_C_END 1359 1360 EXTERN_C_BEGIN 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1363 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1364 { 1365 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1366 PetscErrorCode ierr; 1367 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1368 1369 PetscFunctionBegin; 1370 if (aij->nonew != 1) { 1371 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1372 } 1373 if (!aij->saved_values) { 1374 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1375 } 1376 1377 /* copy values over */ 1378 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 EXTERN_C_END 1382 1383 EXTERN_C_BEGIN 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1386 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1387 { 1388 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1389 PetscErrorCode ierr; 1390 PetscInt i,mbs,bs2; 1391 PetscTruth skipallocation = PETSC_FALSE,flg; 1392 1393 PetscFunctionBegin; 1394 B->preallocated = PETSC_TRUE; 1395 ierr = PetscOptionsGetInt(((PetscObject)B)->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1396 B->rmap.bs = B->cmap.bs = bs; 1397 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1398 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1399 1400 mbs = B->rmap.N/bs; 1401 bs2 = bs*bs; 1402 1403 if (mbs*bs != B->rmap.N) { 1404 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1405 } 1406 1407 if (nz == MAT_SKIP_ALLOCATION) { 1408 skipallocation = PETSC_TRUE; 1409 nz = 0; 1410 } 1411 1412 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1413 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1414 if (nnz) { 1415 for (i=0; i<mbs; i++) { 1416 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1417 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1418 } 1419 } 1420 1421 ierr = PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1422 if (!flg) { 1423 switch (bs) { 1424 case 1: 1425 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1426 B->ops->solve = MatSolve_SeqSBAIJ_1; 1427 B->ops->solves = MatSolves_SeqSBAIJ_1; 1428 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1429 B->ops->mult = MatMult_SeqSBAIJ_1; 1430 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1431 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1432 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1433 break; 1434 case 2: 1435 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1436 B->ops->solve = MatSolve_SeqSBAIJ_2; 1437 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1438 B->ops->mult = MatMult_SeqSBAIJ_2; 1439 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1440 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1441 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1442 break; 1443 case 3: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1445 B->ops->solve = MatSolve_SeqSBAIJ_3; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1447 B->ops->mult = MatMult_SeqSBAIJ_3; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1449 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1450 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1451 break; 1452 case 4: 1453 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1454 B->ops->solve = MatSolve_SeqSBAIJ_4; 1455 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1456 B->ops->mult = MatMult_SeqSBAIJ_4; 1457 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1458 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1459 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1460 break; 1461 case 5: 1462 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1463 B->ops->solve = MatSolve_SeqSBAIJ_5; 1464 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1465 B->ops->mult = MatMult_SeqSBAIJ_5; 1466 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1467 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1468 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1469 break; 1470 case 6: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1472 B->ops->solve = MatSolve_SeqSBAIJ_6; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1474 B->ops->mult = MatMult_SeqSBAIJ_6; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1476 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1477 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1478 break; 1479 case 7: 1480 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1481 B->ops->solve = MatSolve_SeqSBAIJ_7; 1482 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1483 B->ops->mult = MatMult_SeqSBAIJ_7; 1484 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1485 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1486 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1487 break; 1488 } 1489 } 1490 1491 b->mbs = mbs; 1492 b->nbs = mbs; 1493 if (!skipallocation) { 1494 if (!b->imax) { 1495 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1496 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 1497 } 1498 if (!nnz) { 1499 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1500 else if (nz <= 0) nz = 1; 1501 for (i=0; i<mbs; i++) { 1502 b->imax[i] = nz; 1503 } 1504 nz = nz*mbs; /* total nz */ 1505 } else { 1506 nz = 0; 1507 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1508 } 1509 /* b->ilen will count nonzeros in each block row so far. */ 1510 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1511 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1512 1513 /* allocate the matrix space */ 1514 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 1515 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1516 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1517 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1518 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1519 b->singlemalloc = PETSC_TRUE; 1520 1521 /* pointer to beginning of each row */ 1522 b->i[0] = 0; 1523 for (i=1; i<mbs+1; i++) { 1524 b->i[i] = b->i[i-1] + b->imax[i-1]; 1525 } 1526 b->free_a = PETSC_TRUE; 1527 b->free_ij = PETSC_TRUE; 1528 } else { 1529 b->free_a = PETSC_FALSE; 1530 b->free_ij = PETSC_FALSE; 1531 } 1532 1533 B->rmap.bs = bs; 1534 b->bs2 = bs2; 1535 b->nz = 0; 1536 b->maxnz = nz*bs2; 1537 1538 b->inew = 0; 1539 b->jnew = 0; 1540 b->anew = 0; 1541 b->a2anew = 0; 1542 b->permute = PETSC_FALSE; 1543 PetscFunctionReturn(0); 1544 } 1545 EXTERN_C_END 1546 1547 EXTERN_C_BEGIN 1548 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1549 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1550 EXTERN_C_END 1551 1552 1553 EXTERN_C_BEGIN 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatGetFactor_seqsbaij_petsc" 1556 PetscErrorCode MatGetFactor_seqsbaij_petsc(Mat A,MatFactorType ftype,Mat *B) 1557 { 1558 PetscInt n = A->rmap.n; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 1563 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 1564 if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 1565 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 1566 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1567 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 1568 PetscFunctionReturn(0); 1569 } 1570 EXTERN_C_END 1571 1572 EXTERN_C_BEGIN 1573 #if defined(PETSC_HAVE_MUMPS) 1574 extern PetscErrorCode MatGetFactor_seqsbaij_mumps(Mat,MatFactorType,Mat*); 1575 #endif 1576 #if defined(PETSC_HAVE_SPOOLES) 1577 extern PetscErrorCode MatGetFactor_seqsbaij_spooles(Mat,MatFactorType,Mat*); 1578 #endif 1579 EXTERN_C_END 1580 1581 /*MC 1582 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1583 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1584 1585 Options Database Keys: 1586 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1587 1588 Level: beginner 1589 1590 .seealso: MatCreateSeqSBAIJ 1591 M*/ 1592 1593 EXTERN_C_BEGIN 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1596 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1597 { 1598 Mat_SeqSBAIJ *b; 1599 PetscErrorCode ierr; 1600 PetscMPIInt size; 1601 PetscTruth flg; 1602 1603 PetscFunctionBegin; 1604 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 1605 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1606 1607 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1608 B->data = (void*)b; 1609 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1610 B->ops->destroy = MatDestroy_SeqSBAIJ; 1611 B->ops->view = MatView_SeqSBAIJ; 1612 B->mapping = 0; 1613 b->row = 0; 1614 b->icol = 0; 1615 b->reallocs = 0; 1616 b->saved_values = 0; 1617 1618 1619 b->roworiented = PETSC_TRUE; 1620 b->nonew = 0; 1621 b->diag = 0; 1622 b->solve_work = 0; 1623 b->mult_work = 0; 1624 B->spptr = 0; 1625 b->keepzeroedrows = PETSC_FALSE; 1626 b->xtoy = 0; 1627 b->XtoY = 0; 1628 1629 b->inew = 0; 1630 b->jnew = 0; 1631 b->anew = 0; 1632 b->a2anew = 0; 1633 b->permute = PETSC_FALSE; 1634 1635 b->ignore_ltriangular = PETSC_FALSE; 1636 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1637 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1638 1639 b->getrow_utriangular = PETSC_FALSE; 1640 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1641 if (flg) b->getrow_utriangular = PETSC_TRUE; 1642 1643 #if defined(PETSC_HAVE_SPOOLES) 1644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_spooles_C", 1645 "MatGetFactor_seqsbaij_spooles", 1646 MatGetFactor_seqsbaij_spooles);CHKERRQ(ierr); 1647 #endif 1648 #if defined(PETSC_HAVE_MUMPS) 1649 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_mumps_C", 1650 "MatGetFactor_seqsbaij_mumps", 1651 MatGetFactor_seqsbaij_mumps);CHKERRQ(ierr); 1652 #endif 1653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqsbaij_petsc_C", 1654 "MatGetFactor_seqsbaij_petsc", 1655 MatGetFactor_seqsbaij_petsc);CHKERRQ(ierr); 1656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1657 "MatStoreValues_SeqSBAIJ", 1658 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1660 "MatRetrieveValues_SeqSBAIJ", 1661 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1663 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1664 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1665 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1666 "MatConvert_SeqSBAIJ_SeqAIJ", 1667 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1669 "MatConvert_SeqSBAIJ_SeqBAIJ", 1670 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1671 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1672 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1673 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1674 1675 B->symmetric = PETSC_TRUE; 1676 B->structurally_symmetric = PETSC_TRUE; 1677 B->symmetric_set = PETSC_TRUE; 1678 B->structurally_symmetric_set = PETSC_TRUE; 1679 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 EXTERN_C_END 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1686 /*@C 1687 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1688 compressed row) format. For good matrix assembly performance the 1689 user should preallocate the matrix storage by setting the parameter nz 1690 (or the array nnz). By setting these parameters accurately, performance 1691 during matrix assembly can be increased by more than a factor of 50. 1692 1693 Collective on Mat 1694 1695 Input Parameters: 1696 + A - the symmetric matrix 1697 . bs - size of block 1698 . nz - number of block nonzeros per block row (same for all rows) 1699 - nnz - array containing the number of block nonzeros in the upper triangular plus 1700 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1701 1702 Options Database Keys: 1703 . -mat_no_unroll - uses code that does not unroll the loops in the 1704 block calculations (much slower) 1705 . -mat_block_size - size of the blocks to use 1706 1707 Level: intermediate 1708 1709 Notes: 1710 Specify the preallocated storage with either nz or nnz (not both). 1711 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1712 allocation. For additional details, see the users manual chapter on 1713 matrices. 1714 1715 You can call MatGetInfo() to get information on how effective the preallocation was; 1716 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1717 You can also run with the option -info and look for messages with the string 1718 malloc in them to see if additional memory allocation was needed. 1719 1720 If the nnz parameter is given then the nz parameter is ignored 1721 1722 1723 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1724 @*/ 1725 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1726 { 1727 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1728 1729 PetscFunctionBegin; 1730 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1731 if (f) { 1732 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatCreateSeqSBAIJ" 1739 /*@C 1740 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1741 compressed row) format. For good matrix assembly performance the 1742 user should preallocate the matrix storage by setting the parameter nz 1743 (or the array nnz). By setting these parameters accurately, performance 1744 during matrix assembly can be increased by more than a factor of 50. 1745 1746 Collective on MPI_Comm 1747 1748 Input Parameters: 1749 + comm - MPI communicator, set to PETSC_COMM_SELF 1750 . bs - size of block 1751 . m - number of rows, or number of columns 1752 . nz - number of block nonzeros per block row (same for all rows) 1753 - nnz - array containing the number of block nonzeros in the upper triangular plus 1754 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1755 1756 Output Parameter: 1757 . A - the symmetric matrix 1758 1759 Options Database Keys: 1760 . -mat_no_unroll - uses code that does not unroll the loops in the 1761 block calculations (much slower) 1762 . -mat_block_size - size of the blocks to use 1763 1764 Level: intermediate 1765 1766 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1767 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 1768 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 1769 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1770 1771 Notes: 1772 The number of rows and columns must be divisible by blocksize. 1773 This matrix type does not support complex Hermitian operation. 1774 1775 Specify the preallocated storage with either nz or nnz (not both). 1776 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1777 allocation. For additional details, see the users manual chapter on 1778 matrices. 1779 1780 If the nnz parameter is given then the nz parameter is ignored 1781 1782 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1783 @*/ 1784 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1785 { 1786 PetscErrorCode ierr; 1787 1788 PetscFunctionBegin; 1789 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1790 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1791 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1792 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1793 PetscFunctionReturn(0); 1794 } 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1798 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1799 { 1800 Mat C; 1801 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1802 PetscErrorCode ierr; 1803 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1804 1805 PetscFunctionBegin; 1806 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1807 1808 *B = 0; 1809 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1810 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1811 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1812 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1813 c = (Mat_SeqSBAIJ*)C->data; 1814 1815 C->preallocated = PETSC_TRUE; 1816 C->factor = A->factor; 1817 c->row = 0; 1818 c->icol = 0; 1819 c->saved_values = 0; 1820 c->keepzeroedrows = a->keepzeroedrows; 1821 C->assembled = PETSC_TRUE; 1822 1823 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1824 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1825 c->bs2 = a->bs2; 1826 c->mbs = a->mbs; 1827 c->nbs = a->nbs; 1828 1829 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1830 for (i=0; i<mbs; i++) { 1831 c->imax[i] = a->imax[i]; 1832 c->ilen[i] = a->ilen[i]; 1833 } 1834 1835 /* allocate the matrix space */ 1836 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1837 c->singlemalloc = PETSC_TRUE; 1838 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1839 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1840 if (mbs > 0) { 1841 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1842 if (cpvalues == MAT_COPY_VALUES) { 1843 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1844 } else { 1845 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1846 } 1847 } 1848 1849 c->roworiented = a->roworiented; 1850 c->nonew = a->nonew; 1851 1852 if (a->diag) { 1853 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1854 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1855 for (i=0; i<mbs; i++) { 1856 c->diag[i] = a->diag[i]; 1857 } 1858 } else c->diag = 0; 1859 c->nz = a->nz; 1860 c->maxnz = a->maxnz; 1861 c->solve_work = 0; 1862 c->mult_work = 0; 1863 c->free_a = PETSC_TRUE; 1864 c->free_ij = PETSC_TRUE; 1865 *B = C; 1866 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1872 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, const MatType type,Mat *A) 1873 { 1874 Mat_SeqSBAIJ *a; 1875 Mat B; 1876 PetscErrorCode ierr; 1877 int fd; 1878 PetscMPIInt size; 1879 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1880 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1881 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1882 PetscInt *masked,nmask,tmp,bs2,ishift; 1883 PetscScalar *aa; 1884 MPI_Comm comm = ((PetscObject)viewer)->comm; 1885 1886 PetscFunctionBegin; 1887 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1888 bs2 = bs*bs; 1889 1890 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1891 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1892 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1893 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1894 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1895 M = header[1]; N = header[2]; nz = header[3]; 1896 1897 if (header[3] < 0) { 1898 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1899 } 1900 1901 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1902 1903 /* 1904 This code adds extra rows to make sure the number of rows is 1905 divisible by the blocksize 1906 */ 1907 mbs = M/bs; 1908 extra_rows = bs - M + bs*(mbs); 1909 if (extra_rows == bs) extra_rows = 0; 1910 else mbs++; 1911 if (extra_rows) { 1912 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1913 } 1914 1915 /* read in row lengths */ 1916 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1917 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1918 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1919 1920 /* read in column indices */ 1921 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1922 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1923 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1924 1925 /* loop over row lengths determining block row lengths */ 1926 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1927 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1928 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1929 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1930 masked = mask + mbs; 1931 rowcount = 0; nzcount = 0; 1932 for (i=0; i<mbs; i++) { 1933 nmask = 0; 1934 for (j=0; j<bs; j++) { 1935 kmax = rowlengths[rowcount]; 1936 for (k=0; k<kmax; k++) { 1937 tmp = jj[nzcount++]/bs; /* block col. index */ 1938 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1939 } 1940 rowcount++; 1941 } 1942 s_browlengths[i] += nmask; 1943 1944 /* zero out the mask elements we set */ 1945 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1946 } 1947 1948 /* create our matrix */ 1949 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1950 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1951 ierr = MatSetType(B,type);CHKERRQ(ierr); 1952 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1953 a = (Mat_SeqSBAIJ*)B->data; 1954 1955 /* set matrix "i" values */ 1956 a->i[0] = 0; 1957 for (i=1; i<= mbs; i++) { 1958 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1959 a->ilen[i-1] = s_browlengths[i-1]; 1960 } 1961 a->nz = a->i[mbs]; 1962 1963 /* read in nonzero values */ 1964 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1965 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1966 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1967 1968 /* set "a" and "j" values into matrix */ 1969 nzcount = 0; jcount = 0; 1970 for (i=0; i<mbs; i++) { 1971 nzcountb = nzcount; 1972 nmask = 0; 1973 for (j=0; j<bs; j++) { 1974 kmax = rowlengths[i*bs+j]; 1975 for (k=0; k<kmax; k++) { 1976 tmp = jj[nzcount++]/bs; /* block col. index */ 1977 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1978 } 1979 } 1980 /* sort the masked values */ 1981 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1982 1983 /* set "j" values into matrix */ 1984 maskcount = 1; 1985 for (j=0; j<nmask; j++) { 1986 a->j[jcount++] = masked[j]; 1987 mask[masked[j]] = maskcount++; 1988 } 1989 1990 /* set "a" values into matrix */ 1991 ishift = bs2*a->i[i]; 1992 for (j=0; j<bs; j++) { 1993 kmax = rowlengths[i*bs+j]; 1994 for (k=0; k<kmax; k++) { 1995 tmp = jj[nzcountb]/bs ; /* block col. index */ 1996 if (tmp >= i){ 1997 block = mask[tmp] - 1; 1998 point = jj[nzcountb] - bs*tmp; 1999 idx = ishift + bs2*block + j + bs*point; 2000 a->a[idx] = aa[nzcountb]; 2001 } 2002 nzcountb++; 2003 } 2004 } 2005 /* zero out the mask elements we set */ 2006 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2007 } 2008 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2009 2010 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2011 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 2012 ierr = PetscFree(aa);CHKERRQ(ierr); 2013 ierr = PetscFree(jj);CHKERRQ(ierr); 2014 ierr = PetscFree(mask);CHKERRQ(ierr); 2015 2016 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2018 ierr = MatView_Private(B);CHKERRQ(ierr); 2019 *A = B; 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatRelax_SeqSBAIJ" 2025 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2026 { 2027 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2028 MatScalar *aa=a->a,*v,*v1; 2029 PetscScalar *x,*b,*t,sum,d; 2030 PetscErrorCode ierr; 2031 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 2032 PetscInt nz,nz1,*vj,*vj1,i; 2033 2034 PetscFunctionBegin; 2035 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 2036 2037 its = its*lits; 2038 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2039 2040 if (bs > 1) 2041 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2042 2043 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2044 if (xx != bb) { 2045 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2046 } else { 2047 b = x; 2048 } 2049 2050 if (!a->relax_work) { 2051 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 2052 } 2053 t = a->relax_work; 2054 2055 if (flag & SOR_ZERO_INITIAL_GUESS) { 2056 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2057 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2058 2059 for (i=0; i<m; i++){ 2060 d = *(aa + ai[i]); /* diag[i] */ 2061 v = aa + ai[i] + 1; 2062 vj = aj + ai[i] + 1; 2063 nz = ai[i+1] - ai[i] - 1; 2064 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2065 x[i] = omega*t[i]/d; 2066 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2067 } 2068 } 2069 2070 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2071 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2072 t = b; 2073 } 2074 2075 for (i=m-1; i>=0; i--){ 2076 d = *(aa + ai[i]); 2077 v = aa + ai[i] + 1; 2078 vj = aj + ai[i] + 1; 2079 nz = ai[i+1] - ai[i] - 1; 2080 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2081 sum = t[i]; 2082 while (nz--) sum -= x[*vj++]*(*v++); 2083 x[i] = (1-omega)*x[i] + omega*sum/d; 2084 } 2085 t = a->relax_work; 2086 } 2087 its--; 2088 } 2089 2090 while (its--) { 2091 /* 2092 forward sweep: 2093 for i=0,...,m-1: 2094 sum[i] = (b[i] - U(i,:)x )/d[i]; 2095 x[i] = (1-omega)x[i] + omega*sum[i]; 2096 b = b - x[i]*U^T(i,:); 2097 2098 */ 2099 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2100 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2101 2102 for (i=0; i<m; i++){ 2103 d = *(aa + ai[i]); /* diag[i] */ 2104 v = aa + ai[i] + 1; v1=v; 2105 vj = aj + ai[i] + 1; vj1=vj; 2106 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2107 sum = t[i]; 2108 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2109 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2110 x[i] = (1-omega)*x[i] + omega*sum/d; 2111 while (nz--) t[*vj++] -= x[i]*(*v++); 2112 } 2113 } 2114 2115 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2116 /* 2117 backward sweep: 2118 b = b - x[i]*U^T(i,:), i=0,...,n-2 2119 for i=m-1,...,0: 2120 sum[i] = (b[i] - U(i,:)x )/d[i]; 2121 x[i] = (1-omega)x[i] + omega*sum[i]; 2122 */ 2123 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2124 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2125 2126 for (i=0; i<m-1; i++){ /* update rhs */ 2127 v = aa + ai[i] + 1; 2128 vj = aj + ai[i] + 1; 2129 nz = ai[i+1] - ai[i] - 1; 2130 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2131 while (nz--) t[*vj++] -= x[i]*(*v++); 2132 } 2133 for (i=m-1; i>=0; i--){ 2134 d = *(aa + ai[i]); 2135 v = aa + ai[i] + 1; 2136 vj = aj + ai[i] + 1; 2137 nz = ai[i+1] - ai[i] - 1; 2138 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2139 sum = t[i]; 2140 while (nz--) sum -= x[*vj++]*(*v++); 2141 x[i] = (1-omega)*x[i] + omega*sum/d; 2142 } 2143 } 2144 } 2145 2146 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2147 if (bb != xx) { 2148 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2149 } 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2155 /*@ 2156 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2157 (upper triangular entries in CSR format) provided by the user. 2158 2159 Collective on MPI_Comm 2160 2161 Input Parameters: 2162 + comm - must be an MPI communicator of size 1 2163 . bs - size of block 2164 . m - number of rows 2165 . n - number of columns 2166 . i - row indices 2167 . j - column indices 2168 - a - matrix values 2169 2170 Output Parameter: 2171 . mat - the matrix 2172 2173 Level: intermediate 2174 2175 Notes: 2176 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2177 once the matrix is destroyed 2178 2179 You cannot set new nonzero locations into this matrix, that will generate an error. 2180 2181 The i and j indices are 0 based 2182 2183 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2184 2185 @*/ 2186 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2187 { 2188 PetscErrorCode ierr; 2189 PetscInt ii; 2190 Mat_SeqSBAIJ *sbaij; 2191 2192 PetscFunctionBegin; 2193 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2194 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2195 2196 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2197 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2198 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2199 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2200 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2201 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2202 2203 sbaij->i = i; 2204 sbaij->j = j; 2205 sbaij->a = a; 2206 sbaij->singlemalloc = PETSC_FALSE; 2207 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2208 sbaij->free_a = PETSC_FALSE; 2209 sbaij->free_ij = PETSC_FALSE; 2210 2211 for (ii=0; ii<m; ii++) { 2212 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2213 #if defined(PETSC_USE_DEBUG) 2214 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2215 #endif 2216 } 2217 #if defined(PETSC_USE_DEBUG) 2218 for (ii=0; ii<sbaij->i[m]; ii++) { 2219 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2220 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2221 } 2222 #endif 2223 2224 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2225 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 2230 2231 2232 2233