1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 #if defined(PETSC_HAVE_DSCPACK) 25 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 26 #endif 27 28 #define CHUNKSIZE 10 29 30 /* 31 Checks for missing diagonals 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 35 int MatMissingDiagonal_SeqBAIJ(Mat A) 36 { 37 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 38 int *diag,*jj = a->j,i,ierr; 39 40 PetscFunctionBegin; 41 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 42 diag = a->diag; 43 for (i=0; i<a->mbs; i++) { 44 if (jj[diag[i]] != i) { 45 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 46 } 47 } 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 53 int MatMarkDiagonal_SeqBAIJ(Mat A) 54 { 55 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 56 int i,j,*diag,m = a->mbs,ierr; 57 58 PetscFunctionBegin; 59 if (a->diag) PetscFunctionReturn(0); 60 61 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 62 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 63 for (i=0; i<m; i++) { 64 diag[i] = a->i[i+1]; 65 for (j=a->i[i]; j<a->i[i+1]; j++) { 66 if (a->j[j] == i) { 67 diag[i] = j; 68 break; 69 } 70 } 71 } 72 a->diag = diag; 73 PetscFunctionReturn(0); 74 } 75 76 77 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 78 79 #undef __FUNCT__ 80 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 81 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 82 { 83 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 84 int ierr,n = a->mbs,i; 85 86 PetscFunctionBegin; 87 *nn = n; 88 if (!ia) PetscFunctionReturn(0); 89 if (symmetric) { 90 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 91 } else if (oshift == 1) { 92 /* temporarily add 1 to i and j indices */ 93 int nz = a->i[n]; 94 for (i=0; i<nz; i++) a->j[i]++; 95 for (i=0; i<n+1; i++) a->i[i]++; 96 *ia = a->i; *ja = a->j; 97 } else { 98 *ia = a->i; *ja = a->j; 99 } 100 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 106 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 107 { 108 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 109 int i,n = a->mbs,ierr; 110 111 PetscFunctionBegin; 112 if (!ia) PetscFunctionReturn(0); 113 if (symmetric) { 114 ierr = PetscFree(*ia);CHKERRQ(ierr); 115 ierr = PetscFree(*ja);CHKERRQ(ierr); 116 } else if (oshift == 1) { 117 int nz = a->i[n]-1; 118 for (i=0; i<nz; i++) a->j[i]--; 119 for (i=0; i<n+1; i++) a->i[i]--; 120 } 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 126 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 127 { 128 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 129 130 PetscFunctionBegin; 131 *bs = baij->bs; 132 PetscFunctionReturn(0); 133 } 134 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatDestroy_SeqBAIJ" 138 int MatDestroy_SeqBAIJ(Mat A) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 int ierr; 142 143 PetscFunctionBegin; 144 #if defined(PETSC_USE_LOG) 145 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 146 #endif 147 ierr = PetscFree(a->a);CHKERRQ(ierr); 148 if (!a->singlemalloc) { 149 ierr = PetscFree(a->i);CHKERRQ(ierr); 150 ierr = PetscFree(a->j);CHKERRQ(ierr); 151 } 152 if (a->row) { 153 ierr = ISDestroy(a->row);CHKERRQ(ierr); 154 } 155 if (a->col) { 156 ierr = ISDestroy(a->col);CHKERRQ(ierr); 157 } 158 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 159 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 160 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 161 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 162 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 163 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 164 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 165 #if defined(PETSC_USE_MAT_SINGLE) 166 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 167 #endif 168 ierr = PetscFree(a);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatSetOption_SeqBAIJ" 174 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 175 { 176 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 177 178 PetscFunctionBegin; 179 switch (op) { 180 case MAT_ROW_ORIENTED: 181 a->roworiented = PETSC_TRUE; 182 break; 183 case MAT_COLUMN_ORIENTED: 184 a->roworiented = PETSC_FALSE; 185 break; 186 case MAT_COLUMNS_SORTED: 187 a->sorted = PETSC_TRUE; 188 break; 189 case MAT_COLUMNS_UNSORTED: 190 a->sorted = PETSC_FALSE; 191 break; 192 case MAT_KEEP_ZEROED_ROWS: 193 a->keepzeroedrows = PETSC_TRUE; 194 break; 195 case MAT_NO_NEW_NONZERO_LOCATIONS: 196 a->nonew = 1; 197 break; 198 case MAT_NEW_NONZERO_LOCATION_ERR: 199 a->nonew = -1; 200 break; 201 case MAT_NEW_NONZERO_ALLOCATION_ERR: 202 a->nonew = -2; 203 break; 204 case MAT_YES_NEW_NONZERO_LOCATIONS: 205 a->nonew = 0; 206 break; 207 case MAT_ROWS_SORTED: 208 case MAT_ROWS_UNSORTED: 209 case MAT_YES_NEW_DIAGONALS: 210 case MAT_IGNORE_OFF_PROC_ENTRIES: 211 case MAT_USE_HASH_TABLE: 212 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 213 break; 214 case MAT_NO_NEW_DIAGONALS: 215 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 216 case MAT_USE_SINGLE_PRECISION_SOLVES: 217 if (a->bs==4) { 218 PetscTruth sse_enabled_local,sse_enabled_global; 219 int ierr; 220 221 sse_enabled_local = PETSC_FALSE; 222 sse_enabled_global = PETSC_FALSE; 223 224 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 225 #if defined(PETSC_HAVE_SSE) 226 if (sse_enabled_local) { 227 a->single_precision_solves = PETSC_TRUE; 228 A->ops->solve = MatSolve_SeqBAIJ_Update; 229 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 230 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 231 break; 232 } else { 233 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 234 } 235 #else 236 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 237 #endif 238 } 239 break; 240 default: 241 SETERRQ(PETSC_ERR_SUP,"unknown option"); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatGetRow_SeqBAIJ" 248 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 251 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 252 MatScalar *aa,*aa_i; 253 PetscScalar *v_i; 254 255 PetscFunctionBegin; 256 bs = a->bs; 257 ai = a->i; 258 aj = a->j; 259 aa = a->a; 260 bs2 = a->bs2; 261 262 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 263 264 bn = row/bs; /* Block number */ 265 bp = row % bs; /* Block Position */ 266 M = ai[bn+1] - ai[bn]; 267 *nz = bs*M; 268 269 if (v) { 270 *v = 0; 271 if (*nz) { 272 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 273 for (i=0; i<M; i++) { /* for each block in the block row */ 274 v_i = *v + i*bs; 275 aa_i = aa + bs2*(ai[bn] + i); 276 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 277 } 278 } 279 } 280 281 if (idx) { 282 *idx = 0; 283 if (*nz) { 284 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 285 for (i=0; i<M; i++) { /* for each block in the block row */ 286 idx_i = *idx + i*bs; 287 itmp = bs*aj[ai[bn] + i]; 288 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 289 } 290 } 291 } 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 297 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 298 { 299 int ierr; 300 301 PetscFunctionBegin; 302 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 303 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqBAIJ" 309 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 310 { 311 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 312 Mat C; 313 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 314 int *rows,*cols,bs2=a->bs2; 315 PetscScalar *array; 316 317 PetscFunctionBegin; 318 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 319 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 320 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 321 322 #if defined(PETSC_USE_MAT_SINGLE) 323 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 324 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 325 #else 326 array = a->a; 327 #endif 328 329 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 330 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 331 ierr = PetscFree(col);CHKERRQ(ierr); 332 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 333 cols = rows + bs; 334 for (i=0; i<mbs; i++) { 335 cols[0] = i*bs; 336 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 337 len = ai[i+1] - ai[i]; 338 for (j=0; j<len; j++) { 339 rows[0] = (*aj++)*bs; 340 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 341 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 342 array += bs2; 343 } 344 } 345 ierr = PetscFree(rows);CHKERRQ(ierr); 346 #if defined(PETSC_USE_MAT_SINGLE) 347 ierr = PetscFree(array); 348 #endif 349 350 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 351 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 353 if (B) { 354 *B = C; 355 } else { 356 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 363 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 364 { 365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 366 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 367 PetscScalar *aa; 368 FILE *file; 369 370 PetscFunctionBegin; 371 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 372 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 373 col_lens[0] = MAT_FILE_COOKIE; 374 375 col_lens[1] = A->m; 376 col_lens[2] = A->n; 377 col_lens[3] = a->nz*bs2; 378 379 /* store lengths of each row and write (including header) to file */ 380 count = 0; 381 for (i=0; i<a->mbs; i++) { 382 for (j=0; j<bs; j++) { 383 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 384 } 385 } 386 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 387 ierr = PetscFree(col_lens);CHKERRQ(ierr); 388 389 /* store column indices (zero start index) */ 390 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 391 count = 0; 392 for (i=0; i<a->mbs; i++) { 393 for (j=0; j<bs; j++) { 394 for (k=a->i[i]; k<a->i[i+1]; k++) { 395 for (l=0; l<bs; l++) { 396 jj[count++] = bs*a->j[k] + l; 397 } 398 } 399 } 400 } 401 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 402 ierr = PetscFree(jj);CHKERRQ(ierr); 403 404 /* store nonzero values */ 405 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 406 count = 0; 407 for (i=0; i<a->mbs; i++) { 408 for (j=0; j<bs; j++) { 409 for (k=a->i[i]; k<a->i[i+1]; k++) { 410 for (l=0; l<bs; l++) { 411 aa[count++] = a->a[bs2*k + l*bs + j]; 412 } 413 } 414 } 415 } 416 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 417 ierr = PetscFree(aa);CHKERRQ(ierr); 418 419 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 420 if (file) { 421 fprintf(file,"-matload_block_size %d\n",a->bs); 422 } 423 PetscFunctionReturn(0); 424 } 425 426 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 434 PetscViewerFormat format; 435 436 PetscFunctionBegin; 437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 439 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 441 Mat aij; 442 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 443 ierr = MatView(aij,viewer);CHKERRQ(ierr); 444 ierr = MatDestroy(aij);CHKERRQ(ierr); 445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 446 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 447 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 448 #endif 449 PetscFunctionReturn(0); 450 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 451 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 452 for (i=0; i<a->mbs; i++) { 453 for (j=0; j<bs; j++) { 454 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 455 for (k=a->i[i]; k<a->i[i+1]; k++) { 456 for (l=0; l<bs; l++) { 457 #if defined(PETSC_USE_COMPLEX) 458 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 459 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 460 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 461 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 462 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 463 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 464 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 465 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 466 } 467 #else 468 if (a->a[bs2*k + l*bs + j] != 0.0) { 469 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 470 } 471 #endif 472 } 473 } 474 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 475 } 476 } 477 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 478 } else { 479 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 480 for (i=0; i<a->mbs; i++) { 481 for (j=0; j<bs; j++) { 482 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 483 for (k=a->i[i]; k<a->i[i+1]; k++) { 484 for (l=0; l<bs; l++) { 485 #if defined(PETSC_USE_COMPLEX) 486 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 487 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 488 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 489 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 490 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 491 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 492 } else { 493 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 494 } 495 #else 496 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 497 #endif 498 } 499 } 500 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 501 } 502 } 503 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 504 } 505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 511 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 512 { 513 Mat A = (Mat) Aa; 514 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 515 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 516 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 517 MatScalar *aa; 518 PetscViewer viewer; 519 520 PetscFunctionBegin; 521 522 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 523 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 524 525 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 526 527 /* loop over matrix elements drawing boxes */ 528 color = PETSC_DRAW_BLUE; 529 for (i=0,row=0; i<mbs; i++,row+=bs) { 530 for (j=a->i[i]; j<a->i[i+1]; j++) { 531 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 532 x_l = a->j[j]*bs; x_r = x_l + 1.0; 533 aa = a->a + j*bs2; 534 for (k=0; k<bs; k++) { 535 for (l=0; l<bs; l++) { 536 if (PetscRealPart(*aa++) >= 0.) continue; 537 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 538 } 539 } 540 } 541 } 542 color = PETSC_DRAW_CYAN; 543 for (i=0,row=0; i<mbs; i++,row+=bs) { 544 for (j=a->i[i]; j<a->i[i+1]; j++) { 545 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 546 x_l = a->j[j]*bs; x_r = x_l + 1.0; 547 aa = a->a + j*bs2; 548 for (k=0; k<bs; k++) { 549 for (l=0; l<bs; l++) { 550 if (PetscRealPart(*aa++) != 0.) continue; 551 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 557 color = PETSC_DRAW_RED; 558 for (i=0,row=0; i<mbs; i++,row+=bs) { 559 for (j=a->i[i]; j<a->i[i+1]; j++) { 560 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 561 x_l = a->j[j]*bs; x_r = x_l + 1.0; 562 aa = a->a + j*bs2; 563 for (k=0; k<bs; k++) { 564 for (l=0; l<bs; l++) { 565 if (PetscRealPart(*aa++) <= 0.) continue; 566 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 576 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 577 { 578 int ierr; 579 PetscReal xl,yl,xr,yr,w,h; 580 PetscDraw draw; 581 PetscTruth isnull; 582 583 PetscFunctionBegin; 584 585 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 586 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 587 588 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 589 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 590 xr += w; yr += h; xl = -w; yl = -h; 591 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 592 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 593 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatView_SeqBAIJ" 599 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 600 { 601 int ierr; 602 PetscTruth issocket,isascii,isbinary,isdraw; 603 604 PetscFunctionBegin; 605 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 606 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 607 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 608 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 609 if (issocket) { 610 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 611 } else if (isascii){ 612 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 613 } else if (isbinary) { 614 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 615 } else if (isdraw) { 616 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 617 } else { 618 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 619 } 620 PetscFunctionReturn(0); 621 } 622 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatGetValues_SeqBAIJ" 626 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 627 { 628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 629 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 630 int *ai = a->i,*ailen = a->ilen; 631 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 632 MatScalar *ap,*aa = a->a,zero = 0.0; 633 634 PetscFunctionBegin; 635 for (k=0; k<m; k++) { /* loop over rows */ 636 row = im[k]; brow = row/bs; 637 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 638 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 639 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 640 nrow = ailen[brow]; 641 for (l=0; l<n; l++) { /* loop over columns */ 642 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 643 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 644 col = in[l] ; 645 bcol = col/bs; 646 cidx = col%bs; 647 ridx = row%bs; 648 high = nrow; 649 low = 0; /* assume unsorted */ 650 while (high-low > 5) { 651 t = (low+high)/2; 652 if (rp[t] > bcol) high = t; 653 else low = t; 654 } 655 for (i=low; i<high; i++) { 656 if (rp[i] > bcol) break; 657 if (rp[i] == bcol) { 658 *v++ = ap[bs2*i+bs*cidx+ridx]; 659 goto finished; 660 } 661 } 662 *v++ = zero; 663 finished:; 664 } 665 } 666 PetscFunctionReturn(0); 667 } 668 669 #if defined(PETSC_USE_MAT_SINGLE) 670 #undef __FUNCT__ 671 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 672 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 673 { 674 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 675 int ierr,i,N = m*n*b->bs2; 676 MatScalar *vsingle; 677 678 PetscFunctionBegin; 679 if (N > b->setvalueslen) { 680 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 681 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 682 b->setvalueslen = N; 683 } 684 vsingle = b->setvaluescopy; 685 for (i=0; i<N; i++) { 686 vsingle[i] = v[i]; 687 } 688 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 #endif 692 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 696 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 697 { 698 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 699 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 700 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 701 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 702 PetscTruth roworiented=a->roworiented; 703 MatScalar *value = v,*ap,*aa = a->a,*bap; 704 705 PetscFunctionBegin; 706 if (roworiented) { 707 stepval = (n-1)*bs; 708 } else { 709 stepval = (m-1)*bs; 710 } 711 for (k=0; k<m; k++) { /* loop over added rows */ 712 row = im[k]; 713 if (row < 0) continue; 714 #if defined(PETSC_USE_BOPT_g) 715 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 716 #endif 717 rp = aj + ai[row]; 718 ap = aa + bs2*ai[row]; 719 rmax = imax[row]; 720 nrow = ailen[row]; 721 low = 0; 722 for (l=0; l<n; l++) { /* loop over added columns */ 723 if (in[l] < 0) continue; 724 #if defined(PETSC_USE_BOPT_g) 725 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 726 #endif 727 col = in[l]; 728 if (roworiented) { 729 value = v + k*(stepval+bs)*bs + l*bs; 730 } else { 731 value = v + l*(stepval+bs)*bs + k*bs; 732 } 733 if (!sorted) low = 0; high = nrow; 734 while (high-low > 7) { 735 t = (low+high)/2; 736 if (rp[t] > col) high = t; 737 else low = t; 738 } 739 for (i=low; i<high; i++) { 740 if (rp[i] > col) break; 741 if (rp[i] == col) { 742 bap = ap + bs2*i; 743 if (roworiented) { 744 if (is == ADD_VALUES) { 745 for (ii=0; ii<bs; ii++,value+=stepval) { 746 for (jj=ii; jj<bs2; jj+=bs) { 747 bap[jj] += *value++; 748 } 749 } 750 } else { 751 for (ii=0; ii<bs; ii++,value+=stepval) { 752 for (jj=ii; jj<bs2; jj+=bs) { 753 bap[jj] = *value++; 754 } 755 } 756 } 757 } else { 758 if (is == ADD_VALUES) { 759 for (ii=0; ii<bs; ii++,value+=stepval) { 760 for (jj=0; jj<bs; jj++) { 761 *bap++ += *value++; 762 } 763 } 764 } else { 765 for (ii=0; ii<bs; ii++,value+=stepval) { 766 for (jj=0; jj<bs; jj++) { 767 *bap++ = *value++; 768 } 769 } 770 } 771 } 772 goto noinsert2; 773 } 774 } 775 if (nonew == 1) goto noinsert2; 776 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 777 if (nrow >= rmax) { 778 /* there is no extra room in row, therefore enlarge */ 779 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 780 MatScalar *new_a; 781 782 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 783 784 /* malloc new storage space */ 785 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 786 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 787 new_j = (int*)(new_a + bs2*new_nz); 788 new_i = new_j + new_nz; 789 790 /* copy over old data into new slots */ 791 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 792 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 793 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 794 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 795 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 796 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 797 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 798 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 799 /* free up old matrix storage */ 800 ierr = PetscFree(a->a);CHKERRQ(ierr); 801 if (!a->singlemalloc) { 802 ierr = PetscFree(a->i);CHKERRQ(ierr); 803 ierr = PetscFree(a->j);CHKERRQ(ierr); 804 } 805 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 806 a->singlemalloc = PETSC_TRUE; 807 808 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 809 rmax = imax[row] = imax[row] + CHUNKSIZE; 810 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 811 a->maxnz += bs2*CHUNKSIZE; 812 a->reallocs++; 813 a->nz++; 814 } 815 N = nrow++ - 1; 816 /* shift up all the later entries in this row */ 817 for (ii=N; ii>=i; ii--) { 818 rp[ii+1] = rp[ii]; 819 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 820 } 821 if (N >= i) { 822 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 823 } 824 rp[i] = col; 825 bap = ap + bs2*i; 826 if (roworiented) { 827 for (ii=0; ii<bs; ii++,value+=stepval) { 828 for (jj=ii; jj<bs2; jj+=bs) { 829 bap[jj] = *value++; 830 } 831 } 832 } else { 833 for (ii=0; ii<bs; ii++,value+=stepval) { 834 for (jj=0; jj<bs; jj++) { 835 *bap++ = *value++; 836 } 837 } 838 } 839 noinsert2:; 840 low = i; 841 } 842 ailen[row] = nrow; 843 } 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 849 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 850 { 851 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 852 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 853 int m = A->m,*ip,N,*ailen = a->ilen; 854 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 855 MatScalar *aa = a->a,*ap; 856 #if defined(PETSC_HAVE_DSCPACK) 857 PetscTruth flag; 858 #endif 859 860 PetscFunctionBegin; 861 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 862 863 if (m) rmax = ailen[0]; 864 for (i=1; i<mbs; i++) { 865 /* move each row back by the amount of empty slots (fshift) before it*/ 866 fshift += imax[i-1] - ailen[i-1]; 867 rmax = PetscMax(rmax,ailen[i]); 868 if (fshift) { 869 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 870 N = ailen[i]; 871 for (j=0; j<N; j++) { 872 ip[j-fshift] = ip[j]; 873 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 874 } 875 } 876 ai[i] = ai[i-1] + ailen[i-1]; 877 } 878 if (mbs) { 879 fshift += imax[mbs-1] - ailen[mbs-1]; 880 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 881 } 882 /* reset ilen and imax for each row */ 883 for (i=0; i<mbs; i++) { 884 ailen[i] = imax[i] = ai[i+1] - ai[i]; 885 } 886 a->nz = ai[mbs]; 887 888 /* diagonals may have moved, so kill the diagonal pointers */ 889 if (fshift && a->diag) { 890 ierr = PetscFree(a->diag);CHKERRQ(ierr); 891 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 892 a->diag = 0; 893 } 894 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 895 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 896 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 897 a->reallocs = 0; 898 A->info.nz_unneeded = (PetscReal)fshift*bs2; 899 900 #if defined(PETSC_HAVE_DSCPACK) 901 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 902 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 903 #endif 904 905 PetscFunctionReturn(0); 906 } 907 908 909 910 /* 911 This function returns an array of flags which indicate the locations of contiguous 912 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 913 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 914 Assume: sizes should be long enough to hold all the values. 915 */ 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 918 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 919 { 920 int i,j,k,row; 921 PetscTruth flg; 922 923 PetscFunctionBegin; 924 for (i=0,j=0; i<n; j++) { 925 row = idx[i]; 926 if (row%bs!=0) { /* Not the begining of a block */ 927 sizes[j] = 1; 928 i++; 929 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 930 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 931 i++; 932 } else { /* Begining of the block, so check if the complete block exists */ 933 flg = PETSC_TRUE; 934 for (k=1; k<bs; k++) { 935 if (row+k != idx[i+k]) { /* break in the block */ 936 flg = PETSC_FALSE; 937 break; 938 } 939 } 940 if (flg == PETSC_TRUE) { /* No break in the bs */ 941 sizes[j] = bs; 942 i+= bs; 943 } else { 944 sizes[j] = 1; 945 i++; 946 } 947 } 948 } 949 *bs_max = j; 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 955 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 956 { 957 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 958 int ierr,i,j,k,count,is_n,*is_idx,*rows; 959 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 960 PetscScalar zero = 0.0; 961 MatScalar *aa; 962 963 PetscFunctionBegin; 964 /* Make a copy of the IS and sort it */ 965 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 966 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 967 968 /* allocate memory for rows,sizes */ 969 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 970 sizes = rows + is_n; 971 972 /* copy IS values to rows, and sort them */ 973 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 974 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 975 if (baij->keepzeroedrows) { 976 for (i=0; i<is_n; i++) { sizes[i] = 1; } 977 bs_max = is_n; 978 } else { 979 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 980 } 981 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 982 983 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 984 row = rows[j]; 985 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 986 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 987 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 988 if (sizes[i] == bs && !baij->keepzeroedrows) { 989 if (diag) { 990 if (baij->ilen[row/bs] > 0) { 991 baij->ilen[row/bs] = 1; 992 baij->j[baij->i[row/bs]] = row/bs; 993 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 994 } 995 /* Now insert all the diagonal values for this bs */ 996 for (k=0; k<bs; k++) { 997 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 998 } 999 } else { /* (!diag) */ 1000 baij->ilen[row/bs] = 0; 1001 } /* end (!diag) */ 1002 } else { /* (sizes[i] != bs) */ 1003 #if defined (PETSC_USE_DEBUG) 1004 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1005 #endif 1006 for (k=0; k<count; k++) { 1007 aa[0] = zero; 1008 aa += bs; 1009 } 1010 if (diag) { 1011 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1012 } 1013 } 1014 } 1015 1016 ierr = PetscFree(rows);CHKERRQ(ierr); 1017 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1023 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1024 { 1025 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1026 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1027 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1028 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1029 int ridx,cidx,bs2=a->bs2,ierr; 1030 PetscTruth roworiented=a->roworiented; 1031 MatScalar *ap,value,*aa=a->a,*bap; 1032 1033 PetscFunctionBegin; 1034 for (k=0; k<m; k++) { /* loop over added rows */ 1035 row = im[k]; brow = row/bs; 1036 if (row < 0) continue; 1037 #if defined(PETSC_USE_BOPT_g) 1038 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1039 #endif 1040 rp = aj + ai[brow]; 1041 ap = aa + bs2*ai[brow]; 1042 rmax = imax[brow]; 1043 nrow = ailen[brow]; 1044 low = 0; 1045 for (l=0; l<n; l++) { /* loop over added columns */ 1046 if (in[l] < 0) continue; 1047 #if defined(PETSC_USE_BOPT_g) 1048 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1049 #endif 1050 col = in[l]; bcol = col/bs; 1051 ridx = row % bs; cidx = col % bs; 1052 if (roworiented) { 1053 value = v[l + k*n]; 1054 } else { 1055 value = v[k + l*m]; 1056 } 1057 if (!sorted) low = 0; high = nrow; 1058 while (high-low > 7) { 1059 t = (low+high)/2; 1060 if (rp[t] > bcol) high = t; 1061 else low = t; 1062 } 1063 for (i=low; i<high; i++) { 1064 if (rp[i] > bcol) break; 1065 if (rp[i] == bcol) { 1066 bap = ap + bs2*i + bs*cidx + ridx; 1067 if (is == ADD_VALUES) *bap += value; 1068 else *bap = value; 1069 goto noinsert1; 1070 } 1071 } 1072 if (nonew == 1) goto noinsert1; 1073 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1074 if (nrow >= rmax) { 1075 /* there is no extra room in row, therefore enlarge */ 1076 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1077 MatScalar *new_a; 1078 1079 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1080 1081 /* Malloc new storage space */ 1082 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1083 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1084 new_j = (int*)(new_a + bs2*new_nz); 1085 new_i = new_j + new_nz; 1086 1087 /* copy over old data into new slots */ 1088 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1089 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1090 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1091 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1092 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1093 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1094 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1095 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1096 /* free up old matrix storage */ 1097 ierr = PetscFree(a->a);CHKERRQ(ierr); 1098 if (!a->singlemalloc) { 1099 ierr = PetscFree(a->i);CHKERRQ(ierr); 1100 ierr = PetscFree(a->j);CHKERRQ(ierr); 1101 } 1102 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1103 a->singlemalloc = PETSC_TRUE; 1104 1105 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1106 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1107 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1108 a->maxnz += bs2*CHUNKSIZE; 1109 a->reallocs++; 1110 a->nz++; 1111 } 1112 N = nrow++ - 1; 1113 /* shift up all the later entries in this row */ 1114 for (ii=N; ii>=i; ii--) { 1115 rp[ii+1] = rp[ii]; 1116 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1117 } 1118 if (N>=i) { 1119 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1120 } 1121 rp[i] = bcol; 1122 ap[bs2*i + bs*cidx + ridx] = value; 1123 noinsert1:; 1124 low = i; 1125 } 1126 ailen[brow] = nrow; 1127 } 1128 PetscFunctionReturn(0); 1129 } 1130 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1134 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1135 { 1136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1137 Mat outA; 1138 int ierr; 1139 PetscTruth row_identity,col_identity; 1140 1141 PetscFunctionBegin; 1142 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1143 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1144 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1145 if (!row_identity || !col_identity) { 1146 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1147 } 1148 1149 outA = inA; 1150 inA->factor = FACTOR_LU; 1151 1152 if (!a->diag) { 1153 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1154 } 1155 1156 a->row = row; 1157 a->col = col; 1158 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1159 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1160 1161 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1162 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1163 PetscLogObjectParent(inA,a->icol); 1164 1165 /* 1166 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1167 for ILU(0) factorization with natural ordering 1168 */ 1169 if (a->bs < 8) { 1170 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1171 } else { 1172 if (!a->solve_work) { 1173 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1174 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1175 } 1176 } 1177 1178 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1179 1180 PetscFunctionReturn(0); 1181 } 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1184 int MatPrintHelp_SeqBAIJ(Mat A) 1185 { 1186 static PetscTruth called = PETSC_FALSE; 1187 MPI_Comm comm = A->comm; 1188 int ierr; 1189 1190 PetscFunctionBegin; 1191 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1192 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1193 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 EXTERN_C_BEGIN 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1200 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1201 { 1202 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1203 int i,nz,nbs; 1204 1205 PetscFunctionBegin; 1206 nz = baij->maxnz/baij->bs2; 1207 nbs = baij->nbs; 1208 for (i=0; i<nz; i++) { 1209 baij->j[i] = indices[i]; 1210 } 1211 baij->nz = nz; 1212 for (i=0; i<nbs; i++) { 1213 baij->ilen[i] = baij->imax[i]; 1214 } 1215 1216 PetscFunctionReturn(0); 1217 } 1218 EXTERN_C_END 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1222 /*@ 1223 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1224 in the matrix. 1225 1226 Input Parameters: 1227 + mat - the SeqBAIJ matrix 1228 - indices - the column indices 1229 1230 Level: advanced 1231 1232 Notes: 1233 This can be called if you have precomputed the nonzero structure of the 1234 matrix and want to provide it to the matrix object to improve the performance 1235 of the MatSetValues() operation. 1236 1237 You MUST have set the correct numbers of nonzeros per row in the call to 1238 MatCreateSeqBAIJ(). 1239 1240 MUST be called before any calls to MatSetValues(); 1241 1242 @*/ 1243 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1244 { 1245 int ierr,(*f)(Mat,int *); 1246 1247 PetscFunctionBegin; 1248 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1249 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1250 if (f) { 1251 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1252 } else { 1253 SETERRQ(1,"Wrong type of matrix to set column indices"); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1260 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1261 { 1262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1263 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1264 PetscReal atmp; 1265 PetscScalar *x,zero = 0.0; 1266 MatScalar *aa; 1267 int ncols,brow,krow,kcol; 1268 1269 PetscFunctionBegin; 1270 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1271 bs = a->bs; 1272 aa = a->a; 1273 ai = a->i; 1274 aj = a->j; 1275 mbs = a->mbs; 1276 1277 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1278 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1279 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1280 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1281 for (i=0; i<mbs; i++) { 1282 ncols = ai[1] - ai[0]; ai++; 1283 brow = bs*i; 1284 for (j=0; j<ncols; j++){ 1285 /* bcol = bs*(*aj); */ 1286 for (kcol=0; kcol<bs; kcol++){ 1287 for (krow=0; krow<bs; krow++){ 1288 atmp = PetscAbsScalar(*aa); aa++; 1289 row = brow + krow; /* row index */ 1290 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1291 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1292 } 1293 } 1294 aj++; 1295 } 1296 } 1297 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1303 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1304 { 1305 int ierr; 1306 1307 PetscFunctionBegin; 1308 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1314 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1315 { 1316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1317 PetscFunctionBegin; 1318 *array = a->a; 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1324 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1325 { 1326 PetscFunctionBegin; 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /* -------------------------------------------------------------------*/ 1331 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1332 MatGetRow_SeqBAIJ, 1333 MatRestoreRow_SeqBAIJ, 1334 MatMult_SeqBAIJ_N, 1335 MatMultAdd_SeqBAIJ_N, 1336 MatMultTranspose_SeqBAIJ, 1337 MatMultTransposeAdd_SeqBAIJ, 1338 MatSolve_SeqBAIJ_N, 1339 0, 1340 0, 1341 0, 1342 MatLUFactor_SeqBAIJ, 1343 0, 1344 0, 1345 MatTranspose_SeqBAIJ, 1346 MatGetInfo_SeqBAIJ, 1347 MatEqual_SeqBAIJ, 1348 MatGetDiagonal_SeqBAIJ, 1349 MatDiagonalScale_SeqBAIJ, 1350 MatNorm_SeqBAIJ, 1351 0, 1352 MatAssemblyEnd_SeqBAIJ, 1353 0, 1354 MatSetOption_SeqBAIJ, 1355 MatZeroEntries_SeqBAIJ, 1356 MatZeroRows_SeqBAIJ, 1357 MatLUFactorSymbolic_SeqBAIJ, 1358 MatLUFactorNumeric_SeqBAIJ_N, 1359 0, 1360 0, 1361 MatSetUpPreallocation_SeqBAIJ, 1362 MatILUFactorSymbolic_SeqBAIJ, 1363 0, 1364 MatGetArray_SeqBAIJ, 1365 MatRestoreArray_SeqBAIJ, 1366 MatDuplicate_SeqBAIJ, 1367 0, 1368 0, 1369 MatILUFactor_SeqBAIJ, 1370 0, 1371 0, 1372 MatGetSubMatrices_SeqBAIJ, 1373 MatIncreaseOverlap_SeqBAIJ, 1374 MatGetValues_SeqBAIJ, 1375 0, 1376 MatPrintHelp_SeqBAIJ, 1377 MatScale_SeqBAIJ, 1378 0, 1379 0, 1380 0, 1381 MatGetBlockSize_SeqBAIJ, 1382 MatGetRowIJ_SeqBAIJ, 1383 MatRestoreRowIJ_SeqBAIJ, 1384 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 MatSetValuesBlocked_SeqBAIJ, 1391 MatGetSubMatrix_SeqBAIJ, 1392 MatDestroy_SeqBAIJ, 1393 MatView_SeqBAIJ, 1394 MatGetPetscMaps_Petsc, 1395 0, 1396 0, 1397 0, 1398 0, 1399 0, 1400 0, 1401 MatGetRowMax_SeqBAIJ, 1402 MatConvert_Basic}; 1403 1404 EXTERN_C_BEGIN 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1407 int MatStoreValues_SeqBAIJ(Mat mat) 1408 { 1409 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1410 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1411 int ierr; 1412 1413 PetscFunctionBegin; 1414 if (aij->nonew != 1) { 1415 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1416 } 1417 1418 /* allocate space for values if not already there */ 1419 if (!aij->saved_values) { 1420 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1421 } 1422 1423 /* copy values over */ 1424 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 EXTERN_C_END 1428 1429 EXTERN_C_BEGIN 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1432 int MatRetrieveValues_SeqBAIJ(Mat mat) 1433 { 1434 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1435 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1436 1437 PetscFunctionBegin; 1438 if (aij->nonew != 1) { 1439 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1440 } 1441 if (!aij->saved_values) { 1442 SETERRQ(1,"Must call MatStoreValues(A);first"); 1443 } 1444 1445 /* copy values over */ 1446 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 EXTERN_C_END 1450 1451 EXTERN_C_BEGIN 1452 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1453 EXTERN_C_END 1454 1455 EXTERN_C_BEGIN 1456 #undef __FUNCT__ 1457 #define __FUNCT__ "MatCreate_SeqBAIJ" 1458 int MatCreate_SeqBAIJ(Mat B) 1459 { 1460 int ierr,size; 1461 Mat_SeqBAIJ *b; 1462 1463 PetscFunctionBegin; 1464 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1465 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1466 1467 B->m = B->M = PetscMax(B->m,B->M); 1468 B->n = B->N = PetscMax(B->n,B->N); 1469 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1470 B->data = (void*)b; 1471 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1472 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1473 B->factor = 0; 1474 B->lupivotthreshold = 1.0; 1475 B->mapping = 0; 1476 b->row = 0; 1477 b->col = 0; 1478 b->icol = 0; 1479 b->reallocs = 0; 1480 b->saved_values = 0; 1481 #if defined(PETSC_USE_MAT_SINGLE) 1482 b->setvalueslen = 0; 1483 b->setvaluescopy = PETSC_NULL; 1484 #endif 1485 b->single_precision_solves = PETSC_FALSE; 1486 1487 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1488 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1489 1490 b->sorted = PETSC_FALSE; 1491 b->roworiented = PETSC_TRUE; 1492 b->nonew = 0; 1493 b->diag = 0; 1494 b->solve_work = 0; 1495 b->mult_work = 0; 1496 B->spptr = 0; 1497 B->info.nz_unneeded = (PetscReal)b->maxnz; 1498 b->keepzeroedrows = PETSC_FALSE; 1499 1500 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1501 "MatStoreValues_SeqBAIJ", 1502 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1503 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1504 "MatRetrieveValues_SeqBAIJ", 1505 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1506 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1507 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1508 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1509 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1510 "MatConvert_SeqBAIJ_SeqAIJ", 1511 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 EXTERN_C_END 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1518 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1519 { 1520 Mat C; 1521 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1522 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1523 1524 PetscFunctionBegin; 1525 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1526 1527 *B = 0; 1528 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1529 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1530 c = (Mat_SeqBAIJ*)C->data; 1531 1532 c->bs = a->bs; 1533 c->bs2 = a->bs2; 1534 c->mbs = a->mbs; 1535 c->nbs = a->nbs; 1536 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1537 1538 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1539 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1540 for (i=0; i<mbs; i++) { 1541 c->imax[i] = a->imax[i]; 1542 c->ilen[i] = a->ilen[i]; 1543 } 1544 1545 /* allocate the matrix space */ 1546 c->singlemalloc = PETSC_TRUE; 1547 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1548 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1549 c->j = (int*)(c->a + nz*bs2); 1550 c->i = c->j + nz; 1551 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1552 if (mbs > 0) { 1553 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1554 if (cpvalues == MAT_COPY_VALUES) { 1555 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1556 } else { 1557 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1558 } 1559 } 1560 1561 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1562 c->sorted = a->sorted; 1563 c->roworiented = a->roworiented; 1564 c->nonew = a->nonew; 1565 1566 if (a->diag) { 1567 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1568 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1569 for (i=0; i<mbs; i++) { 1570 c->diag[i] = a->diag[i]; 1571 } 1572 } else c->diag = 0; 1573 c->nz = a->nz; 1574 c->maxnz = a->maxnz; 1575 c->solve_work = 0; 1576 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1577 c->mult_work = 0; 1578 C->preallocated = PETSC_TRUE; 1579 C->assembled = PETSC_TRUE; 1580 *B = C; 1581 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 EXTERN_C_BEGIN 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatLoad_SeqBAIJ" 1588 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1589 { 1590 Mat_SeqBAIJ *a; 1591 Mat B; 1592 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1593 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1594 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1595 int *masked,nmask,tmp,bs2,ishift; 1596 PetscScalar *aa; 1597 MPI_Comm comm = ((PetscObject)viewer)->comm; 1598 1599 PetscFunctionBegin; 1600 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1601 bs2 = bs*bs; 1602 1603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1604 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1605 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1606 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1607 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1608 M = header[1]; N = header[2]; nz = header[3]; 1609 1610 if (header[3] < 0) { 1611 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1612 } 1613 1614 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1615 1616 /* 1617 This code adds extra rows to make sure the number of rows is 1618 divisible by the blocksize 1619 */ 1620 mbs = M/bs; 1621 extra_rows = bs - M + bs*(mbs); 1622 if (extra_rows == bs) extra_rows = 0; 1623 else mbs++; 1624 if (extra_rows) { 1625 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1626 } 1627 1628 /* read in row lengths */ 1629 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1630 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1631 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1632 1633 /* read in column indices */ 1634 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1635 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1636 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1637 1638 /* loop over row lengths determining block row lengths */ 1639 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1640 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1641 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1642 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1643 masked = mask + mbs; 1644 rowcount = 0; nzcount = 0; 1645 for (i=0; i<mbs; i++) { 1646 nmask = 0; 1647 for (j=0; j<bs; j++) { 1648 kmax = rowlengths[rowcount]; 1649 for (k=0; k<kmax; k++) { 1650 tmp = jj[nzcount++]/bs; 1651 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1652 } 1653 rowcount++; 1654 } 1655 browlengths[i] += nmask; 1656 /* zero out the mask elements we set */ 1657 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1658 } 1659 1660 /* create our matrix */ 1661 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1662 B = *A; 1663 a = (Mat_SeqBAIJ*)B->data; 1664 1665 /* set matrix "i" values */ 1666 a->i[0] = 0; 1667 for (i=1; i<= mbs; i++) { 1668 a->i[i] = a->i[i-1] + browlengths[i-1]; 1669 a->ilen[i-1] = browlengths[i-1]; 1670 } 1671 a->nz = 0; 1672 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1673 1674 /* read in nonzero values */ 1675 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1676 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1677 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1678 1679 /* set "a" and "j" values into matrix */ 1680 nzcount = 0; jcount = 0; 1681 for (i=0; i<mbs; i++) { 1682 nzcountb = nzcount; 1683 nmask = 0; 1684 for (j=0; j<bs; j++) { 1685 kmax = rowlengths[i*bs+j]; 1686 for (k=0; k<kmax; k++) { 1687 tmp = jj[nzcount++]/bs; 1688 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1689 } 1690 } 1691 /* sort the masked values */ 1692 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1693 1694 /* set "j" values into matrix */ 1695 maskcount = 1; 1696 for (j=0; j<nmask; j++) { 1697 a->j[jcount++] = masked[j]; 1698 mask[masked[j]] = maskcount++; 1699 } 1700 /* set "a" values into matrix */ 1701 ishift = bs2*a->i[i]; 1702 for (j=0; j<bs; j++) { 1703 kmax = rowlengths[i*bs+j]; 1704 for (k=0; k<kmax; k++) { 1705 tmp = jj[nzcountb]/bs ; 1706 block = mask[tmp] - 1; 1707 point = jj[nzcountb] - bs*tmp; 1708 idx = ishift + bs2*block + j + bs*point; 1709 a->a[idx] = (MatScalar)aa[nzcountb++]; 1710 } 1711 } 1712 /* zero out the mask elements we set */ 1713 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1714 } 1715 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1716 1717 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1718 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1719 ierr = PetscFree(aa);CHKERRQ(ierr); 1720 ierr = PetscFree(jj);CHKERRQ(ierr); 1721 ierr = PetscFree(mask);CHKERRQ(ierr); 1722 1723 B->assembled = PETSC_TRUE; 1724 1725 ierr = MatView_Private(B);CHKERRQ(ierr); 1726 PetscFunctionReturn(0); 1727 } 1728 EXTERN_C_END 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCreateSeqBAIJ" 1732 /*@C 1733 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1734 compressed row) format. For good matrix assembly performance the 1735 user should preallocate the matrix storage by setting the parameter nz 1736 (or the array nnz). By setting these parameters accurately, performance 1737 during matrix assembly can be increased by more than a factor of 50. 1738 1739 Collective on MPI_Comm 1740 1741 Input Parameters: 1742 + comm - MPI communicator, set to PETSC_COMM_SELF 1743 . bs - size of block 1744 . m - number of rows 1745 . n - number of columns 1746 . nz - number of nonzero blocks per block row (same for all rows) 1747 - nnz - array containing the number of nonzero blocks in the various block rows 1748 (possibly different for each block row) or PETSC_NULL 1749 1750 Output Parameter: 1751 . A - the matrix 1752 1753 Options Database Keys: 1754 . -mat_no_unroll - uses code that does not unroll the loops in the 1755 block calculations (much slower) 1756 . -mat_block_size - size of the blocks to use 1757 1758 Level: intermediate 1759 1760 Notes: 1761 A nonzero block is any block that as 1 or more nonzeros in it 1762 1763 The block AIJ format is fully compatible with standard Fortran 77 1764 storage. That is, the stored row and column indices can begin at 1765 either one (as in Fortran) or zero. See the users' manual for details. 1766 1767 Specify the preallocated storage with either nz or nnz (not both). 1768 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1769 allocation. For additional details, see the users manual chapter on 1770 matrices. 1771 1772 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1773 @*/ 1774 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1775 { 1776 int ierr; 1777 1778 PetscFunctionBegin; 1779 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1780 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1781 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1787 /*@C 1788 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1789 per row in the matrix. For good matrix assembly performance the 1790 user should preallocate the matrix storage by setting the parameter nz 1791 (or the array nnz). By setting these parameters accurately, performance 1792 during matrix assembly can be increased by more than a factor of 50. 1793 1794 Collective on MPI_Comm 1795 1796 Input Parameters: 1797 + A - the matrix 1798 . bs - size of block 1799 . nz - number of block nonzeros per block row (same for all rows) 1800 - nnz - array containing the number of block nonzeros in the various block rows 1801 (possibly different for each block row) or PETSC_NULL 1802 1803 Options Database Keys: 1804 . -mat_no_unroll - uses code that does not unroll the loops in the 1805 block calculations (much slower) 1806 . -mat_block_size - size of the blocks to use 1807 1808 Level: intermediate 1809 1810 Notes: 1811 The block AIJ format is fully compatible with standard Fortran 77 1812 storage. That is, the stored row and column indices can begin at 1813 either one (as in Fortran) or zero. See the users' manual for details. 1814 1815 Specify the preallocated storage with either nz or nnz (not both). 1816 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1817 allocation. For additional details, see the users manual chapter on 1818 matrices. 1819 1820 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1821 @*/ 1822 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1823 { 1824 Mat_SeqBAIJ *b; 1825 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1826 PetscTruth flg; 1827 1828 PetscFunctionBegin; 1829 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1830 if (!flg) PetscFunctionReturn(0); 1831 1832 B->preallocated = PETSC_TRUE; 1833 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1834 if (nnz && newbs != bs) { 1835 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1836 } 1837 bs = newbs; 1838 1839 mbs = B->m/bs; 1840 nbs = B->n/bs; 1841 bs2 = bs*bs; 1842 1843 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1844 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1845 } 1846 1847 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1848 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1849 if (nnz) { 1850 for (i=0; i<mbs; i++) { 1851 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1852 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1853 } 1854 } 1855 1856 b = (Mat_SeqBAIJ*)B->data; 1857 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1858 B->ops->solve = MatSolve_SeqBAIJ_Update; 1859 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1860 if (!flg) { 1861 switch (bs) { 1862 case 1: 1863 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1864 B->ops->mult = MatMult_SeqBAIJ_1; 1865 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1866 break; 1867 case 2: 1868 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1869 B->ops->mult = MatMult_SeqBAIJ_2; 1870 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1871 break; 1872 case 3: 1873 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1874 B->ops->mult = MatMult_SeqBAIJ_3; 1875 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1876 break; 1877 case 4: 1878 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1879 B->ops->mult = MatMult_SeqBAIJ_4; 1880 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1881 break; 1882 case 5: 1883 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1884 B->ops->mult = MatMult_SeqBAIJ_5; 1885 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1886 break; 1887 case 6: 1888 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1889 B->ops->mult = MatMult_SeqBAIJ_6; 1890 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1891 break; 1892 case 7: 1893 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1894 B->ops->mult = MatMult_SeqBAIJ_7; 1895 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1896 break; 1897 default: 1898 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1899 B->ops->mult = MatMult_SeqBAIJ_N; 1900 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1901 break; 1902 } 1903 } 1904 b->bs = bs; 1905 b->mbs = mbs; 1906 b->nbs = nbs; 1907 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1908 if (!nnz) { 1909 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1910 else if (nz <= 0) nz = 1; 1911 for (i=0; i<mbs; i++) b->imax[i] = nz; 1912 nz = nz*mbs; 1913 } else { 1914 nz = 0; 1915 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1916 } 1917 1918 /* allocate the matrix space */ 1919 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1920 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1921 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1922 b->j = (int*)(b->a + nz*bs2); 1923 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1924 b->i = b->j + nz; 1925 b->singlemalloc = PETSC_TRUE; 1926 1927 b->i[0] = 0; 1928 for (i=1; i<mbs+1; i++) { 1929 b->i[i] = b->i[i-1] + b->imax[i-1]; 1930 } 1931 1932 /* b->ilen will count nonzeros in each block row so far. */ 1933 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1934 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1935 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1936 1937 b->bs = bs; 1938 b->bs2 = bs2; 1939 b->mbs = mbs; 1940 b->nz = 0; 1941 b->maxnz = nz*bs2; 1942 B->info.nz_unneeded = (PetscReal)b->maxnz; 1943 PetscFunctionReturn(0); 1944 } 1945 1946