1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 #if defined(PETSC_HAVE_DSCPACK) 25 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 26 #endif 27 28 #define CHUNKSIZE 10 29 30 /* 31 Checks for missing diagonals 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 35 int MatMissingDiagonal_SeqBAIJ(Mat A) 36 { 37 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 38 int *diag,*jj = a->j,i,ierr; 39 40 PetscFunctionBegin; 41 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 42 diag = a->diag; 43 for (i=0; i<a->mbs; i++) { 44 if (jj[diag[i]] != i) { 45 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 46 } 47 } 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 53 int MatMarkDiagonal_SeqBAIJ(Mat A) 54 { 55 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 56 int i,j,*diag,m = a->mbs,ierr; 57 58 PetscFunctionBegin; 59 if (a->diag) PetscFunctionReturn(0); 60 61 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 62 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 63 for (i=0; i<m; i++) { 64 diag[i] = a->i[i+1]; 65 for (j=a->i[i]; j<a->i[i+1]; j++) { 66 if (a->j[j] == i) { 67 diag[i] = j; 68 break; 69 } 70 } 71 } 72 a->diag = diag; 73 PetscFunctionReturn(0); 74 } 75 76 77 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 78 79 #undef __FUNCT__ 80 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 81 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 82 { 83 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 84 int ierr,n = a->mbs,i; 85 86 PetscFunctionBegin; 87 *nn = n; 88 if (!ia) PetscFunctionReturn(0); 89 if (symmetric) { 90 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 91 } else if (oshift == 1) { 92 /* temporarily add 1 to i and j indices */ 93 int nz = a->i[n]; 94 for (i=0; i<nz; i++) a->j[i]++; 95 for (i=0; i<n+1; i++) a->i[i]++; 96 *ia = a->i; *ja = a->j; 97 } else { 98 *ia = a->i; *ja = a->j; 99 } 100 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 106 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 107 { 108 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 109 int i,n = a->mbs,ierr; 110 111 PetscFunctionBegin; 112 if (!ia) PetscFunctionReturn(0); 113 if (symmetric) { 114 ierr = PetscFree(*ia);CHKERRQ(ierr); 115 ierr = PetscFree(*ja);CHKERRQ(ierr); 116 } else if (oshift == 1) { 117 int nz = a->i[n]-1; 118 for (i=0; i<nz; i++) a->j[i]--; 119 for (i=0; i<n+1; i++) a->i[i]--; 120 } 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 126 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 127 { 128 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 129 130 PetscFunctionBegin; 131 *bs = baij->bs; 132 PetscFunctionReturn(0); 133 } 134 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatDestroy_SeqBAIJ" 138 int MatDestroy_SeqBAIJ(Mat A) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 int ierr; 142 143 PetscFunctionBegin; 144 #if defined(PETSC_USE_LOG) 145 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 146 #endif 147 ierr = PetscFree(a->a);CHKERRQ(ierr); 148 if (!a->singlemalloc) { 149 ierr = PetscFree(a->i);CHKERRQ(ierr); 150 ierr = PetscFree(a->j);CHKERRQ(ierr); 151 } 152 if (a->row) { 153 ierr = ISDestroy(a->row);CHKERRQ(ierr); 154 } 155 if (a->col) { 156 ierr = ISDestroy(a->col);CHKERRQ(ierr); 157 } 158 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 159 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 160 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 161 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 162 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 163 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 164 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 165 #if defined(PETSC_USE_MAT_SINGLE) 166 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 167 #endif 168 ierr = PetscFree(a);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatSetOption_SeqBAIJ" 174 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 175 { 176 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 177 178 PetscFunctionBegin; 179 switch (op) { 180 case MAT_ROW_ORIENTED: 181 a->roworiented = PETSC_TRUE; 182 break; 183 case MAT_COLUMN_ORIENTED: 184 a->roworiented = PETSC_FALSE; 185 break; 186 case MAT_COLUMNS_SORTED: 187 a->sorted = PETSC_TRUE; 188 break; 189 case MAT_COLUMNS_UNSORTED: 190 a->sorted = PETSC_FALSE; 191 break; 192 case MAT_KEEP_ZEROED_ROWS: 193 a->keepzeroedrows = PETSC_TRUE; 194 break; 195 case MAT_NO_NEW_NONZERO_LOCATIONS: 196 a->nonew = 1; 197 break; 198 case MAT_NEW_NONZERO_LOCATION_ERR: 199 a->nonew = -1; 200 break; 201 case MAT_NEW_NONZERO_ALLOCATION_ERR: 202 a->nonew = -2; 203 break; 204 case MAT_YES_NEW_NONZERO_LOCATIONS: 205 a->nonew = 0; 206 break; 207 case MAT_ROWS_SORTED: 208 case MAT_ROWS_UNSORTED: 209 case MAT_YES_NEW_DIAGONALS: 210 case MAT_IGNORE_OFF_PROC_ENTRIES: 211 case MAT_USE_HASH_TABLE: 212 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 213 break; 214 case MAT_NO_NEW_DIAGONALS: 215 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 216 case MAT_USE_SINGLE_PRECISION_SOLVES: 217 if (a->bs==4) { 218 PetscTruth sse_enabled_local,sse_enabled_global; 219 int ierr; 220 221 sse_enabled_local = PETSC_FALSE; 222 sse_enabled_global = PETSC_FALSE; 223 224 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 225 #if defined(PETSC_HAVE_SSE) 226 if (sse_enabled_local) { 227 a->single_precision_solves = PETSC_TRUE; 228 A->ops->solve = MatSolve_SeqBAIJ_Update; 229 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 230 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 231 break; 232 } else { 233 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 234 } 235 #else 236 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 237 #endif 238 } 239 break; 240 default: 241 SETERRQ(PETSC_ERR_SUP,"unknown option"); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatGetRow_SeqBAIJ" 248 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 251 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 252 MatScalar *aa,*aa_i; 253 PetscScalar *v_i; 254 255 PetscFunctionBegin; 256 bs = a->bs; 257 ai = a->i; 258 aj = a->j; 259 aa = a->a; 260 bs2 = a->bs2; 261 262 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 263 264 bn = row/bs; /* Block number */ 265 bp = row % bs; /* Block Position */ 266 M = ai[bn+1] - ai[bn]; 267 *nz = bs*M; 268 269 if (v) { 270 *v = 0; 271 if (*nz) { 272 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 273 for (i=0; i<M; i++) { /* for each block in the block row */ 274 v_i = *v + i*bs; 275 aa_i = aa + bs2*(ai[bn] + i); 276 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 277 } 278 } 279 } 280 281 if (idx) { 282 *idx = 0; 283 if (*nz) { 284 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 285 for (i=0; i<M; i++) { /* for each block in the block row */ 286 idx_i = *idx + i*bs; 287 itmp = bs*aj[ai[bn] + i]; 288 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 289 } 290 } 291 } 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 297 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 298 { 299 int ierr; 300 301 PetscFunctionBegin; 302 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 303 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqBAIJ" 309 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 310 { 311 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 312 Mat C; 313 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 314 int *rows,*cols,bs2=a->bs2; 315 PetscScalar *array; 316 317 PetscFunctionBegin; 318 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 319 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 320 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 321 322 #if defined(PETSC_USE_MAT_SINGLE) 323 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 324 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 325 #else 326 array = a->a; 327 #endif 328 329 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 330 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 331 ierr = PetscFree(col);CHKERRQ(ierr); 332 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 333 cols = rows + bs; 334 for (i=0; i<mbs; i++) { 335 cols[0] = i*bs; 336 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 337 len = ai[i+1] - ai[i]; 338 for (j=0; j<len; j++) { 339 rows[0] = (*aj++)*bs; 340 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 341 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 342 array += bs2; 343 } 344 } 345 ierr = PetscFree(rows);CHKERRQ(ierr); 346 #if defined(PETSC_USE_MAT_SINGLE) 347 ierr = PetscFree(array); 348 #endif 349 350 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 351 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 353 if (B) { 354 *B = C; 355 } else { 356 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 363 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 364 { 365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 366 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 367 PetscScalar *aa; 368 FILE *file; 369 370 PetscFunctionBegin; 371 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 372 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 373 col_lens[0] = MAT_FILE_COOKIE; 374 375 col_lens[1] = A->m; 376 col_lens[2] = A->n; 377 col_lens[3] = a->nz*bs2; 378 379 /* store lengths of each row and write (including header) to file */ 380 count = 0; 381 for (i=0; i<a->mbs; i++) { 382 for (j=0; j<bs; j++) { 383 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 384 } 385 } 386 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 387 ierr = PetscFree(col_lens);CHKERRQ(ierr); 388 389 /* store column indices (zero start index) */ 390 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 391 count = 0; 392 for (i=0; i<a->mbs; i++) { 393 for (j=0; j<bs; j++) { 394 for (k=a->i[i]; k<a->i[i+1]; k++) { 395 for (l=0; l<bs; l++) { 396 jj[count++] = bs*a->j[k] + l; 397 } 398 } 399 } 400 } 401 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 402 ierr = PetscFree(jj);CHKERRQ(ierr); 403 404 /* store nonzero values */ 405 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 406 count = 0; 407 for (i=0; i<a->mbs; i++) { 408 for (j=0; j<bs; j++) { 409 for (k=a->i[i]; k<a->i[i+1]; k++) { 410 for (l=0; l<bs; l++) { 411 aa[count++] = a->a[bs2*k + l*bs + j]; 412 } 413 } 414 } 415 } 416 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 417 ierr = PetscFree(aa);CHKERRQ(ierr); 418 419 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 420 if (file) { 421 fprintf(file,"-matload_block_size %d\n",a->bs); 422 } 423 PetscFunctionReturn(0); 424 } 425 426 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 434 PetscViewerFormat format; 435 436 PetscFunctionBegin; 437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 439 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 441 Mat aij; 442 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 443 ierr = MatView(aij,viewer);CHKERRQ(ierr); 444 ierr = MatDestroy(aij);CHKERRQ(ierr); 445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 446 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 447 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 448 #endif 449 PetscFunctionReturn(0); 450 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 451 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 452 for (i=0; i<a->mbs; i++) { 453 for (j=0; j<bs; j++) { 454 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 455 for (k=a->i[i]; k<a->i[i+1]; k++) { 456 for (l=0; l<bs; l++) { 457 #if defined(PETSC_USE_COMPLEX) 458 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 459 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 460 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 461 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 462 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 463 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 464 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 465 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 466 } 467 #else 468 if (a->a[bs2*k + l*bs + j] != 0.0) { 469 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 470 } 471 #endif 472 } 473 } 474 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 475 } 476 } 477 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 478 } else { 479 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 480 for (i=0; i<a->mbs; i++) { 481 for (j=0; j<bs; j++) { 482 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 483 for (k=a->i[i]; k<a->i[i+1]; k++) { 484 for (l=0; l<bs; l++) { 485 #if defined(PETSC_USE_COMPLEX) 486 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 487 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 488 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 489 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 490 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 491 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 492 } else { 493 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 494 } 495 #else 496 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 497 #endif 498 } 499 } 500 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 501 } 502 } 503 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 504 } 505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 511 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 512 { 513 Mat A = (Mat) Aa; 514 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 515 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 516 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 517 MatScalar *aa; 518 PetscViewer viewer; 519 520 PetscFunctionBegin; 521 522 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 523 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 524 525 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 526 527 /* loop over matrix elements drawing boxes */ 528 color = PETSC_DRAW_BLUE; 529 for (i=0,row=0; i<mbs; i++,row+=bs) { 530 for (j=a->i[i]; j<a->i[i+1]; j++) { 531 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 532 x_l = a->j[j]*bs; x_r = x_l + 1.0; 533 aa = a->a + j*bs2; 534 for (k=0; k<bs; k++) { 535 for (l=0; l<bs; l++) { 536 if (PetscRealPart(*aa++) >= 0.) continue; 537 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 538 } 539 } 540 } 541 } 542 color = PETSC_DRAW_CYAN; 543 for (i=0,row=0; i<mbs; i++,row+=bs) { 544 for (j=a->i[i]; j<a->i[i+1]; j++) { 545 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 546 x_l = a->j[j]*bs; x_r = x_l + 1.0; 547 aa = a->a + j*bs2; 548 for (k=0; k<bs; k++) { 549 for (l=0; l<bs; l++) { 550 if (PetscRealPart(*aa++) != 0.) continue; 551 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 557 color = PETSC_DRAW_RED; 558 for (i=0,row=0; i<mbs; i++,row+=bs) { 559 for (j=a->i[i]; j<a->i[i+1]; j++) { 560 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 561 x_l = a->j[j]*bs; x_r = x_l + 1.0; 562 aa = a->a + j*bs2; 563 for (k=0; k<bs; k++) { 564 for (l=0; l<bs; l++) { 565 if (PetscRealPart(*aa++) <= 0.) continue; 566 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 576 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 577 { 578 int ierr; 579 PetscReal xl,yl,xr,yr,w,h; 580 PetscDraw draw; 581 PetscTruth isnull; 582 583 PetscFunctionBegin; 584 585 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 586 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 587 588 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 589 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 590 xr += w; yr += h; xl = -w; yl = -h; 591 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 592 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 593 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatView_SeqBAIJ" 599 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 600 { 601 int ierr; 602 PetscTruth issocket,isascii,isbinary,isdraw; 603 604 PetscFunctionBegin; 605 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 606 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 607 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 608 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 609 if (issocket) { 610 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 611 } else if (isascii){ 612 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 613 } else if (isbinary) { 614 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 615 } else if (isdraw) { 616 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 617 } else { 618 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 619 } 620 PetscFunctionReturn(0); 621 } 622 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatGetValues_SeqBAIJ" 626 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 627 { 628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 629 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 630 int *ai = a->i,*ailen = a->ilen; 631 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 632 MatScalar *ap,*aa = a->a,zero = 0.0; 633 634 PetscFunctionBegin; 635 for (k=0; k<m; k++) { /* loop over rows */ 636 row = im[k]; brow = row/bs; 637 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 638 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 639 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 640 nrow = ailen[brow]; 641 for (l=0; l<n; l++) { /* loop over columns */ 642 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 643 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 644 col = in[l] ; 645 bcol = col/bs; 646 cidx = col%bs; 647 ridx = row%bs; 648 high = nrow; 649 low = 0; /* assume unsorted */ 650 while (high-low > 5) { 651 t = (low+high)/2; 652 if (rp[t] > bcol) high = t; 653 else low = t; 654 } 655 for (i=low; i<high; i++) { 656 if (rp[i] > bcol) break; 657 if (rp[i] == bcol) { 658 *v++ = ap[bs2*i+bs*cidx+ridx]; 659 goto finished; 660 } 661 } 662 *v++ = zero; 663 finished:; 664 } 665 } 666 PetscFunctionReturn(0); 667 } 668 669 #if defined(PETSC_USE_MAT_SINGLE) 670 #undef __FUNCT__ 671 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 672 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 673 { 674 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 675 int ierr,i,N = m*n*b->bs2; 676 MatScalar *vsingle; 677 678 PetscFunctionBegin; 679 if (N > b->setvalueslen) { 680 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 681 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 682 b->setvalueslen = N; 683 } 684 vsingle = b->setvaluescopy; 685 for (i=0; i<N; i++) { 686 vsingle[i] = v[i]; 687 } 688 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 #endif 692 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 696 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 697 { 698 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 699 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 700 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 701 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 702 PetscTruth roworiented=a->roworiented; 703 MatScalar *value = v,*ap,*aa = a->a,*bap; 704 705 PetscFunctionBegin; 706 if (roworiented) { 707 stepval = (n-1)*bs; 708 } else { 709 stepval = (m-1)*bs; 710 } 711 for (k=0; k<m; k++) { /* loop over added rows */ 712 row = im[k]; 713 if (row < 0) continue; 714 #if defined(PETSC_USE_BOPT_g) 715 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 716 #endif 717 rp = aj + ai[row]; 718 ap = aa + bs2*ai[row]; 719 rmax = imax[row]; 720 nrow = ailen[row]; 721 low = 0; 722 for (l=0; l<n; l++) { /* loop over added columns */ 723 if (in[l] < 0) continue; 724 #if defined(PETSC_USE_BOPT_g) 725 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 726 #endif 727 col = in[l]; 728 if (roworiented) { 729 value = v + k*(stepval+bs)*bs + l*bs; 730 } else { 731 value = v + l*(stepval+bs)*bs + k*bs; 732 } 733 if (!sorted) low = 0; high = nrow; 734 while (high-low > 7) { 735 t = (low+high)/2; 736 if (rp[t] > col) high = t; 737 else low = t; 738 } 739 for (i=low; i<high; i++) { 740 if (rp[i] > col) break; 741 if (rp[i] == col) { 742 bap = ap + bs2*i; 743 if (roworiented) { 744 if (is == ADD_VALUES) { 745 for (ii=0; ii<bs; ii++,value+=stepval) { 746 for (jj=ii; jj<bs2; jj+=bs) { 747 bap[jj] += *value++; 748 } 749 } 750 } else { 751 for (ii=0; ii<bs; ii++,value+=stepval) { 752 for (jj=ii; jj<bs2; jj+=bs) { 753 bap[jj] = *value++; 754 } 755 } 756 } 757 } else { 758 if (is == ADD_VALUES) { 759 for (ii=0; ii<bs; ii++,value+=stepval) { 760 for (jj=0; jj<bs; jj++) { 761 *bap++ += *value++; 762 } 763 } 764 } else { 765 for (ii=0; ii<bs; ii++,value+=stepval) { 766 for (jj=0; jj<bs; jj++) { 767 *bap++ = *value++; 768 } 769 } 770 } 771 } 772 goto noinsert2; 773 } 774 } 775 if (nonew == 1) goto noinsert2; 776 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 777 if (nrow >= rmax) { 778 /* there is no extra room in row, therefore enlarge */ 779 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 780 MatScalar *new_a; 781 782 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 783 784 /* malloc new storage space */ 785 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 786 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 787 new_j = (int*)(new_a + bs2*new_nz); 788 new_i = new_j + new_nz; 789 790 /* copy over old data into new slots */ 791 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 792 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 793 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 794 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 795 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 796 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 797 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 798 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 799 /* free up old matrix storage */ 800 ierr = PetscFree(a->a);CHKERRQ(ierr); 801 if (!a->singlemalloc) { 802 ierr = PetscFree(a->i);CHKERRQ(ierr); 803 ierr = PetscFree(a->j);CHKERRQ(ierr); 804 } 805 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 806 a->singlemalloc = PETSC_TRUE; 807 808 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 809 rmax = imax[row] = imax[row] + CHUNKSIZE; 810 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 811 a->maxnz += bs2*CHUNKSIZE; 812 a->reallocs++; 813 a->nz++; 814 } 815 N = nrow++ - 1; 816 /* shift up all the later entries in this row */ 817 for (ii=N; ii>=i; ii--) { 818 rp[ii+1] = rp[ii]; 819 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 820 } 821 if (N >= i) { 822 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 823 } 824 rp[i] = col; 825 bap = ap + bs2*i; 826 if (roworiented) { 827 for (ii=0; ii<bs; ii++,value+=stepval) { 828 for (jj=ii; jj<bs2; jj+=bs) { 829 bap[jj] = *value++; 830 } 831 } 832 } else { 833 for (ii=0; ii<bs; ii++,value+=stepval) { 834 for (jj=0; jj<bs; jj++) { 835 *bap++ = *value++; 836 } 837 } 838 } 839 noinsert2:; 840 low = i; 841 } 842 ailen[row] = nrow; 843 } 844 PetscFunctionReturn(0); 845 } 846 847 /* EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); */ 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 850 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 851 { 852 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 853 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 854 int m = A->m,*ip,N,*ailen = a->ilen; 855 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 856 MatScalar *aa = a->a,*ap; 857 #if defined(PETSC_HAVE_DSCPACK) 858 PetscTruth flag; 859 #endif 860 861 PetscFunctionBegin; 862 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 863 864 if (m) rmax = ailen[0]; 865 for (i=1; i<mbs; i++) { 866 /* move each row back by the amount of empty slots (fshift) before it*/ 867 fshift += imax[i-1] - ailen[i-1]; 868 rmax = PetscMax(rmax,ailen[i]); 869 if (fshift) { 870 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 871 N = ailen[i]; 872 for (j=0; j<N; j++) { 873 ip[j-fshift] = ip[j]; 874 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 875 } 876 } 877 ai[i] = ai[i-1] + ailen[i-1]; 878 } 879 if (mbs) { 880 fshift += imax[mbs-1] - ailen[mbs-1]; 881 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 882 } 883 /* reset ilen and imax for each row */ 884 for (i=0; i<mbs; i++) { 885 ailen[i] = imax[i] = ai[i+1] - ai[i]; 886 } 887 a->nz = ai[mbs]; 888 889 /* diagonals may have moved, so kill the diagonal pointers */ 890 if (fshift && a->diag) { 891 ierr = PetscFree(a->diag);CHKERRQ(ierr); 892 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 893 a->diag = 0; 894 } 895 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 896 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 897 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 898 a->reallocs = 0; 899 A->info.nz_unneeded = (PetscReal)fshift*bs2; 900 #if defined(PETSC_HAVE_DSCPACK) 901 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 902 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 903 #endif 904 905 906 PetscFunctionReturn(0); 907 } 908 909 910 911 /* 912 This function returns an array of flags which indicate the locations of contiguous 913 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 914 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 915 Assume: sizes should be long enough to hold all the values. 916 */ 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 919 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 920 { 921 int i,j,k,row; 922 PetscTruth flg; 923 924 PetscFunctionBegin; 925 for (i=0,j=0; i<n; j++) { 926 row = idx[i]; 927 if (row%bs!=0) { /* Not the begining of a block */ 928 sizes[j] = 1; 929 i++; 930 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 931 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 932 i++; 933 } else { /* Begining of the block, so check if the complete block exists */ 934 flg = PETSC_TRUE; 935 for (k=1; k<bs; k++) { 936 if (row+k != idx[i+k]) { /* break in the block */ 937 flg = PETSC_FALSE; 938 break; 939 } 940 } 941 if (flg == PETSC_TRUE) { /* No break in the bs */ 942 sizes[j] = bs; 943 i+= bs; 944 } else { 945 sizes[j] = 1; 946 i++; 947 } 948 } 949 } 950 *bs_max = j; 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 956 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 957 { 958 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 959 int ierr,i,j,k,count,is_n,*is_idx,*rows; 960 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 961 PetscScalar zero = 0.0; 962 MatScalar *aa; 963 964 PetscFunctionBegin; 965 /* Make a copy of the IS and sort it */ 966 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 967 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 968 969 /* allocate memory for rows,sizes */ 970 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 971 sizes = rows + is_n; 972 973 /* copy IS values to rows, and sort them */ 974 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 975 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 976 if (baij->keepzeroedrows) { 977 for (i=0; i<is_n; i++) { sizes[i] = 1; } 978 bs_max = is_n; 979 } else { 980 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 981 } 982 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 983 984 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 985 row = rows[j]; 986 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 987 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 988 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 989 if (sizes[i] == bs && !baij->keepzeroedrows) { 990 if (diag) { 991 if (baij->ilen[row/bs] > 0) { 992 baij->ilen[row/bs] = 1; 993 baij->j[baij->i[row/bs]] = row/bs; 994 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 995 } 996 /* Now insert all the diagonal values for this bs */ 997 for (k=0; k<bs; k++) { 998 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 999 } 1000 } else { /* (!diag) */ 1001 baij->ilen[row/bs] = 0; 1002 } /* end (!diag) */ 1003 } else { /* (sizes[i] != bs) */ 1004 #if defined (PETSC_USE_DEBUG) 1005 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1006 #endif 1007 for (k=0; k<count; k++) { 1008 aa[0] = zero; 1009 aa += bs; 1010 } 1011 if (diag) { 1012 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1013 } 1014 } 1015 } 1016 1017 ierr = PetscFree(rows);CHKERRQ(ierr); 1018 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1024 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1025 { 1026 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1027 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1028 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1029 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1030 int ridx,cidx,bs2=a->bs2,ierr; 1031 PetscTruth roworiented=a->roworiented; 1032 MatScalar *ap,value,*aa=a->a,*bap; 1033 1034 PetscFunctionBegin; 1035 for (k=0; k<m; k++) { /* loop over added rows */ 1036 row = im[k]; brow = row/bs; 1037 if (row < 0) continue; 1038 #if defined(PETSC_USE_BOPT_g) 1039 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1040 #endif 1041 rp = aj + ai[brow]; 1042 ap = aa + bs2*ai[brow]; 1043 rmax = imax[brow]; 1044 nrow = ailen[brow]; 1045 low = 0; 1046 for (l=0; l<n; l++) { /* loop over added columns */ 1047 if (in[l] < 0) continue; 1048 #if defined(PETSC_USE_BOPT_g) 1049 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1050 #endif 1051 col = in[l]; bcol = col/bs; 1052 ridx = row % bs; cidx = col % bs; 1053 if (roworiented) { 1054 value = v[l + k*n]; 1055 } else { 1056 value = v[k + l*m]; 1057 } 1058 if (!sorted) low = 0; high = nrow; 1059 while (high-low > 7) { 1060 t = (low+high)/2; 1061 if (rp[t] > bcol) high = t; 1062 else low = t; 1063 } 1064 for (i=low; i<high; i++) { 1065 if (rp[i] > bcol) break; 1066 if (rp[i] == bcol) { 1067 bap = ap + bs2*i + bs*cidx + ridx; 1068 if (is == ADD_VALUES) *bap += value; 1069 else *bap = value; 1070 goto noinsert1; 1071 } 1072 } 1073 if (nonew == 1) goto noinsert1; 1074 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1075 if (nrow >= rmax) { 1076 /* there is no extra room in row, therefore enlarge */ 1077 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1078 MatScalar *new_a; 1079 1080 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1081 1082 /* Malloc new storage space */ 1083 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1084 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1085 new_j = (int*)(new_a + bs2*new_nz); 1086 new_i = new_j + new_nz; 1087 1088 /* copy over old data into new slots */ 1089 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1090 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1091 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1092 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1093 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1094 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1095 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1096 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1097 /* free up old matrix storage */ 1098 ierr = PetscFree(a->a);CHKERRQ(ierr); 1099 if (!a->singlemalloc) { 1100 ierr = PetscFree(a->i);CHKERRQ(ierr); 1101 ierr = PetscFree(a->j);CHKERRQ(ierr); 1102 } 1103 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1104 a->singlemalloc = PETSC_TRUE; 1105 1106 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1107 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1108 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1109 a->maxnz += bs2*CHUNKSIZE; 1110 a->reallocs++; 1111 a->nz++; 1112 } 1113 N = nrow++ - 1; 1114 /* shift up all the later entries in this row */ 1115 for (ii=N; ii>=i; ii--) { 1116 rp[ii+1] = rp[ii]; 1117 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1118 } 1119 if (N>=i) { 1120 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1121 } 1122 rp[i] = bcol; 1123 ap[bs2*i + bs*cidx + ridx] = value; 1124 noinsert1:; 1125 low = i; 1126 } 1127 ailen[brow] = nrow; 1128 } 1129 PetscFunctionReturn(0); 1130 } 1131 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1135 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1136 { 1137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1138 Mat outA; 1139 int ierr; 1140 PetscTruth row_identity,col_identity; 1141 1142 PetscFunctionBegin; 1143 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1144 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1145 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1146 if (!row_identity || !col_identity) { 1147 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1148 } 1149 1150 outA = inA; 1151 inA->factor = FACTOR_LU; 1152 1153 if (!a->diag) { 1154 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1155 } 1156 1157 a->row = row; 1158 a->col = col; 1159 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1160 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1161 1162 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1163 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1164 PetscLogObjectParent(inA,a->icol); 1165 1166 /* 1167 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1168 for ILU(0) factorization with natural ordering 1169 */ 1170 if (a->bs < 8) { 1171 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1172 } else { 1173 if (!a->solve_work) { 1174 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1175 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1176 } 1177 } 1178 1179 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1180 1181 PetscFunctionReturn(0); 1182 } 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1185 int MatPrintHelp_SeqBAIJ(Mat A) 1186 { 1187 static PetscTruth called = PETSC_FALSE; 1188 MPI_Comm comm = A->comm; 1189 int ierr; 1190 1191 PetscFunctionBegin; 1192 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1193 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1194 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 EXTERN_C_BEGIN 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1201 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1202 { 1203 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1204 int i,nz,nbs; 1205 1206 PetscFunctionBegin; 1207 nz = baij->maxnz/baij->bs2; 1208 nbs = baij->nbs; 1209 for (i=0; i<nz; i++) { 1210 baij->j[i] = indices[i]; 1211 } 1212 baij->nz = nz; 1213 for (i=0; i<nbs; i++) { 1214 baij->ilen[i] = baij->imax[i]; 1215 } 1216 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1223 /*@ 1224 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1225 in the matrix. 1226 1227 Input Parameters: 1228 + mat - the SeqBAIJ matrix 1229 - indices - the column indices 1230 1231 Level: advanced 1232 1233 Notes: 1234 This can be called if you have precomputed the nonzero structure of the 1235 matrix and want to provide it to the matrix object to improve the performance 1236 of the MatSetValues() operation. 1237 1238 You MUST have set the correct numbers of nonzeros per row in the call to 1239 MatCreateSeqBAIJ(). 1240 1241 MUST be called before any calls to MatSetValues(); 1242 1243 @*/ 1244 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1245 { 1246 int ierr,(*f)(Mat,int *); 1247 1248 PetscFunctionBegin; 1249 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1250 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1251 if (f) { 1252 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1253 } else { 1254 SETERRQ(1,"Wrong type of matrix to set column indices"); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1261 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1262 { 1263 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1264 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1265 PetscReal atmp; 1266 PetscScalar *x,zero = 0.0; 1267 MatScalar *aa; 1268 int ncols,brow,krow,kcol; 1269 1270 PetscFunctionBegin; 1271 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1272 bs = a->bs; 1273 aa = a->a; 1274 ai = a->i; 1275 aj = a->j; 1276 mbs = a->mbs; 1277 1278 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1279 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1280 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1281 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1282 for (i=0; i<mbs; i++) { 1283 ncols = ai[1] - ai[0]; ai++; 1284 brow = bs*i; 1285 for (j=0; j<ncols; j++){ 1286 /* bcol = bs*(*aj); */ 1287 for (kcol=0; kcol<bs; kcol++){ 1288 for (krow=0; krow<bs; krow++){ 1289 atmp = PetscAbsScalar(*aa); aa++; 1290 row = brow + krow; /* row index */ 1291 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1292 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1293 } 1294 } 1295 aj++; 1296 } 1297 } 1298 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1304 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1305 { 1306 int ierr; 1307 1308 PetscFunctionBegin; 1309 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1315 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1316 { 1317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1318 PetscFunctionBegin; 1319 *array = a->a; 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1325 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1326 { 1327 PetscFunctionBegin; 1328 PetscFunctionReturn(0); 1329 } 1330 1331 /* -------------------------------------------------------------------*/ 1332 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1333 MatGetRow_SeqBAIJ, 1334 MatRestoreRow_SeqBAIJ, 1335 MatMult_SeqBAIJ_N, 1336 MatMultAdd_SeqBAIJ_N, 1337 MatMultTranspose_SeqBAIJ, 1338 MatMultTransposeAdd_SeqBAIJ, 1339 MatSolve_SeqBAIJ_N, 1340 0, 1341 0, 1342 0, 1343 MatLUFactor_SeqBAIJ, 1344 0, 1345 0, 1346 MatTranspose_SeqBAIJ, 1347 MatGetInfo_SeqBAIJ, 1348 MatEqual_SeqBAIJ, 1349 MatGetDiagonal_SeqBAIJ, 1350 MatDiagonalScale_SeqBAIJ, 1351 MatNorm_SeqBAIJ, 1352 0, 1353 MatAssemblyEnd_SeqBAIJ, 1354 0, 1355 MatSetOption_SeqBAIJ, 1356 MatZeroEntries_SeqBAIJ, 1357 MatZeroRows_SeqBAIJ, 1358 MatLUFactorSymbolic_SeqBAIJ, 1359 MatLUFactorNumeric_SeqBAIJ_N, 1360 0, 1361 0, 1362 MatSetUpPreallocation_SeqBAIJ, 1363 MatILUFactorSymbolic_SeqBAIJ, 1364 0, 1365 MatGetArray_SeqBAIJ, 1366 MatRestoreArray_SeqBAIJ, 1367 MatDuplicate_SeqBAIJ, 1368 0, 1369 0, 1370 MatILUFactor_SeqBAIJ, 1371 0, 1372 0, 1373 MatGetSubMatrices_SeqBAIJ, 1374 MatIncreaseOverlap_SeqBAIJ, 1375 MatGetValues_SeqBAIJ, 1376 0, 1377 MatPrintHelp_SeqBAIJ, 1378 MatScale_SeqBAIJ, 1379 0, 1380 0, 1381 0, 1382 MatGetBlockSize_SeqBAIJ, 1383 MatGetRowIJ_SeqBAIJ, 1384 MatRestoreRowIJ_SeqBAIJ, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 0, 1391 MatSetValuesBlocked_SeqBAIJ, 1392 MatGetSubMatrix_SeqBAIJ, 1393 MatDestroy_SeqBAIJ, 1394 MatView_SeqBAIJ, 1395 MatGetPetscMaps_Petsc, 1396 0, 1397 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 MatGetRowMax_SeqBAIJ, 1403 MatConvert_Basic}; 1404 1405 EXTERN_C_BEGIN 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1408 int MatStoreValues_SeqBAIJ(Mat mat) 1409 { 1410 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1411 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1412 int ierr; 1413 1414 PetscFunctionBegin; 1415 if (aij->nonew != 1) { 1416 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1417 } 1418 1419 /* allocate space for values if not already there */ 1420 if (!aij->saved_values) { 1421 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1422 } 1423 1424 /* copy values over */ 1425 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 EXTERN_C_END 1429 1430 EXTERN_C_BEGIN 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1433 int MatRetrieveValues_SeqBAIJ(Mat mat) 1434 { 1435 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1436 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1437 1438 PetscFunctionBegin; 1439 if (aij->nonew != 1) { 1440 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1441 } 1442 if (!aij->saved_values) { 1443 SETERRQ(1,"Must call MatStoreValues(A);first"); 1444 } 1445 1446 /* copy values over */ 1447 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 EXTERN_C_END 1451 1452 EXTERN_C_BEGIN 1453 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1454 EXTERN_C_END 1455 1456 EXTERN_C_BEGIN 1457 #undef __FUNCT__ 1458 #define __FUNCT__ "MatCreate_SeqBAIJ" 1459 int MatCreate_SeqBAIJ(Mat B) 1460 { 1461 int ierr,size; 1462 Mat_SeqBAIJ *b; 1463 1464 PetscFunctionBegin; 1465 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1466 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1467 1468 B->m = B->M = PetscMax(B->m,B->M); 1469 B->n = B->N = PetscMax(B->n,B->N); 1470 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1471 B->data = (void*)b; 1472 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1473 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1474 B->factor = 0; 1475 B->lupivotthreshold = 1.0; 1476 B->mapping = 0; 1477 b->row = 0; 1478 b->col = 0; 1479 b->icol = 0; 1480 b->reallocs = 0; 1481 b->saved_values = 0; 1482 #if defined(PETSC_USE_MAT_SINGLE) 1483 b->setvalueslen = 0; 1484 b->setvaluescopy = PETSC_NULL; 1485 #endif 1486 b->single_precision_solves = PETSC_FALSE; 1487 1488 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1489 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1490 1491 b->sorted = PETSC_FALSE; 1492 b->roworiented = PETSC_TRUE; 1493 b->nonew = 0; 1494 b->diag = 0; 1495 b->solve_work = 0; 1496 b->mult_work = 0; 1497 B->spptr = 0; 1498 B->info.nz_unneeded = (PetscReal)b->maxnz; 1499 b->keepzeroedrows = PETSC_FALSE; 1500 1501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1502 "MatStoreValues_SeqBAIJ", 1503 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1505 "MatRetrieveValues_SeqBAIJ", 1506 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1508 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1509 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1510 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1511 "MatConvert_SeqBAIJ_SeqAIJ", 1512 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 EXTERN_C_END 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1519 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1520 { 1521 Mat C; 1522 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1523 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1524 1525 PetscFunctionBegin; 1526 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1527 1528 *B = 0; 1529 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1530 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1531 c = (Mat_SeqBAIJ*)C->data; 1532 1533 c->bs = a->bs; 1534 c->bs2 = a->bs2; 1535 c->mbs = a->mbs; 1536 c->nbs = a->nbs; 1537 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1538 1539 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1540 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1541 for (i=0; i<mbs; i++) { 1542 c->imax[i] = a->imax[i]; 1543 c->ilen[i] = a->ilen[i]; 1544 } 1545 1546 /* allocate the matrix space */ 1547 c->singlemalloc = PETSC_TRUE; 1548 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1549 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1550 c->j = (int*)(c->a + nz*bs2); 1551 c->i = c->j + nz; 1552 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1553 if (mbs > 0) { 1554 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1555 if (cpvalues == MAT_COPY_VALUES) { 1556 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1557 } else { 1558 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1559 } 1560 } 1561 1562 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1563 c->sorted = a->sorted; 1564 c->roworiented = a->roworiented; 1565 c->nonew = a->nonew; 1566 1567 if (a->diag) { 1568 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1569 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1570 for (i=0; i<mbs; i++) { 1571 c->diag[i] = a->diag[i]; 1572 } 1573 } else c->diag = 0; 1574 c->nz = a->nz; 1575 c->maxnz = a->maxnz; 1576 c->solve_work = 0; 1577 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1578 c->mult_work = 0; 1579 C->preallocated = PETSC_TRUE; 1580 C->assembled = PETSC_TRUE; 1581 *B = C; 1582 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 EXTERN_C_BEGIN 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatLoad_SeqBAIJ" 1589 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1590 { 1591 Mat_SeqBAIJ *a; 1592 Mat B; 1593 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1594 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1595 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1596 int *masked,nmask,tmp,bs2,ishift; 1597 PetscScalar *aa; 1598 MPI_Comm comm = ((PetscObject)viewer)->comm; 1599 1600 PetscFunctionBegin; 1601 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1602 bs2 = bs*bs; 1603 1604 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1605 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1606 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1607 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1608 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1609 M = header[1]; N = header[2]; nz = header[3]; 1610 1611 if (header[3] < 0) { 1612 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1613 } 1614 1615 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1616 1617 /* 1618 This code adds extra rows to make sure the number of rows is 1619 divisible by the blocksize 1620 */ 1621 mbs = M/bs; 1622 extra_rows = bs - M + bs*(mbs); 1623 if (extra_rows == bs) extra_rows = 0; 1624 else mbs++; 1625 if (extra_rows) { 1626 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1627 } 1628 1629 /* read in row lengths */ 1630 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1631 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1632 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1633 1634 /* read in column indices */ 1635 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1636 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1637 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1638 1639 /* loop over row lengths determining block row lengths */ 1640 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1641 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1642 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1643 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1644 masked = mask + mbs; 1645 rowcount = 0; nzcount = 0; 1646 for (i=0; i<mbs; i++) { 1647 nmask = 0; 1648 for (j=0; j<bs; j++) { 1649 kmax = rowlengths[rowcount]; 1650 for (k=0; k<kmax; k++) { 1651 tmp = jj[nzcount++]/bs; 1652 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1653 } 1654 rowcount++; 1655 } 1656 browlengths[i] += nmask; 1657 /* zero out the mask elements we set */ 1658 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1659 } 1660 1661 /* create our matrix */ 1662 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1663 B = *A; 1664 a = (Mat_SeqBAIJ*)B->data; 1665 1666 /* set matrix "i" values */ 1667 a->i[0] = 0; 1668 for (i=1; i<= mbs; i++) { 1669 a->i[i] = a->i[i-1] + browlengths[i-1]; 1670 a->ilen[i-1] = browlengths[i-1]; 1671 } 1672 a->nz = 0; 1673 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1674 1675 /* read in nonzero values */ 1676 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1677 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1678 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1679 1680 /* set "a" and "j" values into matrix */ 1681 nzcount = 0; jcount = 0; 1682 for (i=0; i<mbs; i++) { 1683 nzcountb = nzcount; 1684 nmask = 0; 1685 for (j=0; j<bs; j++) { 1686 kmax = rowlengths[i*bs+j]; 1687 for (k=0; k<kmax; k++) { 1688 tmp = jj[nzcount++]/bs; 1689 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1690 } 1691 } 1692 /* sort the masked values */ 1693 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1694 1695 /* set "j" values into matrix */ 1696 maskcount = 1; 1697 for (j=0; j<nmask; j++) { 1698 a->j[jcount++] = masked[j]; 1699 mask[masked[j]] = maskcount++; 1700 } 1701 /* set "a" values into matrix */ 1702 ishift = bs2*a->i[i]; 1703 for (j=0; j<bs; j++) { 1704 kmax = rowlengths[i*bs+j]; 1705 for (k=0; k<kmax; k++) { 1706 tmp = jj[nzcountb]/bs ; 1707 block = mask[tmp] - 1; 1708 point = jj[nzcountb] - bs*tmp; 1709 idx = ishift + bs2*block + j + bs*point; 1710 a->a[idx] = (MatScalar)aa[nzcountb++]; 1711 } 1712 } 1713 /* zero out the mask elements we set */ 1714 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1715 } 1716 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1717 1718 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1719 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1720 ierr = PetscFree(aa);CHKERRQ(ierr); 1721 ierr = PetscFree(jj);CHKERRQ(ierr); 1722 ierr = PetscFree(mask);CHKERRQ(ierr); 1723 1724 B->assembled = PETSC_TRUE; 1725 1726 ierr = MatView_Private(B);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 EXTERN_C_END 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatCreateSeqBAIJ" 1733 /*@C 1734 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1735 compressed row) format. For good matrix assembly performance the 1736 user should preallocate the matrix storage by setting the parameter nz 1737 (or the array nnz). By setting these parameters accurately, performance 1738 during matrix assembly can be increased by more than a factor of 50. 1739 1740 Collective on MPI_Comm 1741 1742 Input Parameters: 1743 + comm - MPI communicator, set to PETSC_COMM_SELF 1744 . bs - size of block 1745 . m - number of rows 1746 . n - number of columns 1747 . nz - number of nonzero blocks per block row (same for all rows) 1748 - nnz - array containing the number of nonzero blocks in the various block rows 1749 (possibly different for each block row) or PETSC_NULL 1750 1751 Output Parameter: 1752 . A - the matrix 1753 1754 Options Database Keys: 1755 . -mat_no_unroll - uses code that does not unroll the loops in the 1756 block calculations (much slower) 1757 . -mat_block_size - size of the blocks to use 1758 1759 Level: intermediate 1760 1761 Notes: 1762 A nonzero block is any block that as 1 or more nonzeros in it 1763 1764 The block AIJ format is fully compatible with standard Fortran 77 1765 storage. That is, the stored row and column indices can begin at 1766 either one (as in Fortran) or zero. See the users' manual for details. 1767 1768 Specify the preallocated storage with either nz or nnz (not both). 1769 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1770 allocation. For additional details, see the users manual chapter on 1771 matrices. 1772 1773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1774 @*/ 1775 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1776 { 1777 int ierr; 1778 1779 PetscFunctionBegin; 1780 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1781 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1782 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 1786 #undef __FUNCT__ 1787 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1788 /*@C 1789 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1790 per row in the matrix. For good matrix assembly performance the 1791 user should preallocate the matrix storage by setting the parameter nz 1792 (or the array nnz). By setting these parameters accurately, performance 1793 during matrix assembly can be increased by more than a factor of 50. 1794 1795 Collective on MPI_Comm 1796 1797 Input Parameters: 1798 + A - the matrix 1799 . bs - size of block 1800 . nz - number of block nonzeros per block row (same for all rows) 1801 - nnz - array containing the number of block nonzeros in the various block rows 1802 (possibly different for each block row) or PETSC_NULL 1803 1804 Options Database Keys: 1805 . -mat_no_unroll - uses code that does not unroll the loops in the 1806 block calculations (much slower) 1807 . -mat_block_size - size of the blocks to use 1808 1809 Level: intermediate 1810 1811 Notes: 1812 The block AIJ format is fully compatible with standard Fortran 77 1813 storage. That is, the stored row and column indices can begin at 1814 either one (as in Fortran) or zero. See the users' manual for details. 1815 1816 Specify the preallocated storage with either nz or nnz (not both). 1817 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1818 allocation. For additional details, see the users manual chapter on 1819 matrices. 1820 1821 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1822 @*/ 1823 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1824 { 1825 Mat_SeqBAIJ *b; 1826 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1827 PetscTruth flg; 1828 1829 PetscFunctionBegin; 1830 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1831 if (!flg) PetscFunctionReturn(0); 1832 1833 B->preallocated = PETSC_TRUE; 1834 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1835 if (nnz && newbs != bs) { 1836 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1837 } 1838 bs = newbs; 1839 1840 mbs = B->m/bs; 1841 nbs = B->n/bs; 1842 bs2 = bs*bs; 1843 1844 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1845 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1846 } 1847 1848 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1849 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1850 if (nnz) { 1851 for (i=0; i<mbs; i++) { 1852 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1853 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1854 } 1855 } 1856 1857 b = (Mat_SeqBAIJ*)B->data; 1858 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1859 B->ops->solve = MatSolve_SeqBAIJ_Update; 1860 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1861 if (!flg) { 1862 switch (bs) { 1863 case 1: 1864 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1865 B->ops->mult = MatMult_SeqBAIJ_1; 1866 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1867 break; 1868 case 2: 1869 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1870 B->ops->mult = MatMult_SeqBAIJ_2; 1871 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1872 break; 1873 case 3: 1874 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1875 B->ops->mult = MatMult_SeqBAIJ_3; 1876 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1877 break; 1878 case 4: 1879 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1880 B->ops->mult = MatMult_SeqBAIJ_4; 1881 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1882 break; 1883 case 5: 1884 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1885 B->ops->mult = MatMult_SeqBAIJ_5; 1886 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1887 break; 1888 case 6: 1889 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1890 B->ops->mult = MatMult_SeqBAIJ_6; 1891 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1892 break; 1893 case 7: 1894 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1895 B->ops->mult = MatMult_SeqBAIJ_7; 1896 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1897 break; 1898 default: 1899 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1900 B->ops->mult = MatMult_SeqBAIJ_N; 1901 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1902 break; 1903 } 1904 } 1905 b->bs = bs; 1906 b->mbs = mbs; 1907 b->nbs = nbs; 1908 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1909 if (!nnz) { 1910 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1911 else if (nz <= 0) nz = 1; 1912 for (i=0; i<mbs; i++) b->imax[i] = nz; 1913 nz = nz*mbs; 1914 } else { 1915 nz = 0; 1916 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1917 } 1918 1919 /* allocate the matrix space */ 1920 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1921 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1922 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1923 b->j = (int*)(b->a + nz*bs2); 1924 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1925 b->i = b->j + nz; 1926 b->singlemalloc = PETSC_TRUE; 1927 1928 b->i[0] = 0; 1929 for (i=1; i<mbs+1; i++) { 1930 b->i[i] = b->i[i-1] + b->imax[i-1]; 1931 } 1932 1933 /* b->ilen will count nonzeros in each block row so far. */ 1934 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1935 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1936 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1937 1938 b->bs = bs; 1939 b->bs2 = bs2; 1940 b->mbs = mbs; 1941 b->nz = 0; 1942 b->maxnz = nz*bs2; 1943 B->info.nz_unneeded = (PetscReal)b->maxnz; 1944 PetscFunctionReturn(0); 1945 } 1946 1947