1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled_local,sse_enabled_global; 217 int ierr; 218 219 sse_enabled_local = PETSC_FALSE; 220 sse_enabled_global = PETSC_FALSE; 221 222 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 223 #if defined(PETSC_HAVE_SSE) 224 if (sse_enabled_local) { 225 a->single_precision_solves = PETSC_TRUE; 226 A->ops->solve = MatSolve_SeqBAIJ_Update; 227 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 228 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 229 break; 230 } else { 231 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 232 } 233 #else 234 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 235 #endif 236 } 237 break; 238 default: 239 SETERRQ(PETSC_ERR_SUP,"unknown option"); 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatGetRow_SeqBAIJ" 246 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 247 { 248 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 249 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 250 MatScalar *aa,*aa_i; 251 PetscScalar *v_i; 252 253 PetscFunctionBegin; 254 bs = a->bs; 255 ai = a->i; 256 aj = a->j; 257 aa = a->a; 258 bs2 = a->bs2; 259 260 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 261 262 bn = row/bs; /* Block number */ 263 bp = row % bs; /* Block Position */ 264 M = ai[bn+1] - ai[bn]; 265 *nz = bs*M; 266 267 if (v) { 268 *v = 0; 269 if (*nz) { 270 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 271 for (i=0; i<M; i++) { /* for each block in the block row */ 272 v_i = *v + i*bs; 273 aa_i = aa + bs2*(ai[bn] + i); 274 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 275 } 276 } 277 } 278 279 if (idx) { 280 *idx = 0; 281 if (*nz) { 282 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 283 for (i=0; i<M; i++) { /* for each block in the block row */ 284 idx_i = *idx + i*bs; 285 itmp = bs*aj[ai[bn] + i]; 286 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 287 } 288 } 289 } 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 295 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 296 { 297 int ierr; 298 299 PetscFunctionBegin; 300 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 301 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatTranspose_SeqBAIJ" 307 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 308 { 309 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 310 Mat C; 311 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 312 int *rows,*cols,bs2=a->bs2; 313 PetscScalar *array; 314 315 PetscFunctionBegin; 316 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 317 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 318 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 319 320 #if defined(PETSC_USE_MAT_SINGLE) 321 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 322 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 323 #else 324 array = a->a; 325 #endif 326 327 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 328 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 329 ierr = PetscFree(col);CHKERRQ(ierr); 330 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 331 cols = rows + bs; 332 for (i=0; i<mbs; i++) { 333 cols[0] = i*bs; 334 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 335 len = ai[i+1] - ai[i]; 336 for (j=0; j<len; j++) { 337 rows[0] = (*aj++)*bs; 338 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 339 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 340 array += bs2; 341 } 342 } 343 ierr = PetscFree(rows);CHKERRQ(ierr); 344 #if defined(PETSC_USE_MAT_SINGLE) 345 ierr = PetscFree(array); 346 #endif 347 348 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 350 351 if (B) { 352 *B = C; 353 } else { 354 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 355 } 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 361 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 362 { 363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 364 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 365 PetscScalar *aa; 366 FILE *file; 367 368 PetscFunctionBegin; 369 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 370 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 371 col_lens[0] = MAT_FILE_COOKIE; 372 373 col_lens[1] = A->m; 374 col_lens[2] = A->n; 375 col_lens[3] = a->nz*bs2; 376 377 /* store lengths of each row and write (including header) to file */ 378 count = 0; 379 for (i=0; i<a->mbs; i++) { 380 for (j=0; j<bs; j++) { 381 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 382 } 383 } 384 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 385 ierr = PetscFree(col_lens);CHKERRQ(ierr); 386 387 /* store column indices (zero start index) */ 388 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 389 count = 0; 390 for (i=0; i<a->mbs; i++) { 391 for (j=0; j<bs; j++) { 392 for (k=a->i[i]; k<a->i[i+1]; k++) { 393 for (l=0; l<bs; l++) { 394 jj[count++] = bs*a->j[k] + l; 395 } 396 } 397 } 398 } 399 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 400 ierr = PetscFree(jj);CHKERRQ(ierr); 401 402 /* store nonzero values */ 403 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 404 count = 0; 405 for (i=0; i<a->mbs; i++) { 406 for (j=0; j<bs; j++) { 407 for (k=a->i[i]; k<a->i[i+1]; k++) { 408 for (l=0; l<bs; l++) { 409 aa[count++] = a->a[bs2*k + l*bs + j]; 410 } 411 } 412 } 413 } 414 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 415 ierr = PetscFree(aa);CHKERRQ(ierr); 416 417 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 418 if (file) { 419 fprintf(file,"-matload_block_size %d\n",a->bs); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 426 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 427 { 428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 429 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 430 PetscViewerFormat format; 431 432 PetscFunctionBegin; 433 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 434 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 435 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 436 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 437 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 438 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 439 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 440 for (i=0; i<a->mbs; i++) { 441 for (j=0; j<bs; j++) { 442 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 443 for (k=a->i[i]; k<a->i[i+1]; k++) { 444 for (l=0; l<bs; l++) { 445 #if defined(PETSC_USE_COMPLEX) 446 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 447 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 448 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 449 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 450 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 451 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 452 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 453 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 454 } 455 #else 456 if (a->a[bs2*k + l*bs + j] != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 458 } 459 #endif 460 } 461 } 462 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 463 } 464 } 465 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 466 } else { 467 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 468 for (i=0; i<a->mbs; i++) { 469 for (j=0; j<bs; j++) { 470 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 471 for (k=a->i[i]; k<a->i[i+1]; k++) { 472 for (l=0; l<bs; l++) { 473 #if defined(PETSC_USE_COMPLEX) 474 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 475 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 476 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 477 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 478 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 479 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 480 } else { 481 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 482 } 483 #else 484 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 485 #endif 486 } 487 } 488 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 489 } 490 } 491 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 492 } 493 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 499 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 500 { 501 Mat A = (Mat) Aa; 502 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 503 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 504 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 505 MatScalar *aa; 506 PetscViewer viewer; 507 508 PetscFunctionBegin; 509 510 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 511 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 512 513 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 514 515 /* loop over matrix elements drawing boxes */ 516 color = PETSC_DRAW_BLUE; 517 for (i=0,row=0; i<mbs; i++,row+=bs) { 518 for (j=a->i[i]; j<a->i[i+1]; j++) { 519 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 520 x_l = a->j[j]*bs; x_r = x_l + 1.0; 521 aa = a->a + j*bs2; 522 for (k=0; k<bs; k++) { 523 for (l=0; l<bs; l++) { 524 if (PetscRealPart(*aa++) >= 0.) continue; 525 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 526 } 527 } 528 } 529 } 530 color = PETSC_DRAW_CYAN; 531 for (i=0,row=0; i<mbs; i++,row+=bs) { 532 for (j=a->i[i]; j<a->i[i+1]; j++) { 533 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 534 x_l = a->j[j]*bs; x_r = x_l + 1.0; 535 aa = a->a + j*bs2; 536 for (k=0; k<bs; k++) { 537 for (l=0; l<bs; l++) { 538 if (PetscRealPart(*aa++) != 0.) continue; 539 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 540 } 541 } 542 } 543 } 544 545 color = PETSC_DRAW_RED; 546 for (i=0,row=0; i<mbs; i++,row+=bs) { 547 for (j=a->i[i]; j<a->i[i+1]; j++) { 548 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 549 x_l = a->j[j]*bs; x_r = x_l + 1.0; 550 aa = a->a + j*bs2; 551 for (k=0; k<bs; k++) { 552 for (l=0; l<bs; l++) { 553 if (PetscRealPart(*aa++) <= 0.) continue; 554 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 564 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 565 { 566 int ierr; 567 PetscReal xl,yl,xr,yr,w,h; 568 PetscDraw draw; 569 PetscTruth isnull; 570 571 PetscFunctionBegin; 572 573 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 574 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 575 576 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 577 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 578 xr += w; yr += h; xl = -w; yl = -h; 579 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 580 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 581 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatView_SeqBAIJ" 587 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 588 { 589 int ierr; 590 PetscTruth issocket,isascii,isbinary,isdraw; 591 592 PetscFunctionBegin; 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 596 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 597 if (issocket) { 598 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 599 } else if (isascii){ 600 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 601 } else if (isbinary) { 602 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 603 } else if (isdraw) { 604 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 605 } else { 606 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatGetValues_SeqBAIJ" 614 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 615 { 616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 617 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 618 int *ai = a->i,*ailen = a->ilen; 619 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 620 MatScalar *ap,*aa = a->a,zero = 0.0; 621 622 PetscFunctionBegin; 623 for (k=0; k<m; k++) { /* loop over rows */ 624 row = im[k]; brow = row/bs; 625 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 626 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 627 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 628 nrow = ailen[brow]; 629 for (l=0; l<n; l++) { /* loop over columns */ 630 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 631 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 632 col = in[l] ; 633 bcol = col/bs; 634 cidx = col%bs; 635 ridx = row%bs; 636 high = nrow; 637 low = 0; /* assume unsorted */ 638 while (high-low > 5) { 639 t = (low+high)/2; 640 if (rp[t] > bcol) high = t; 641 else low = t; 642 } 643 for (i=low; i<high; i++) { 644 if (rp[i] > bcol) break; 645 if (rp[i] == bcol) { 646 *v++ = ap[bs2*i+bs*cidx+ridx]; 647 goto finished; 648 } 649 } 650 *v++ = zero; 651 finished:; 652 } 653 } 654 PetscFunctionReturn(0); 655 } 656 657 #if defined(PETSC_USE_MAT_SINGLE) 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 660 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 661 { 662 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 663 int ierr,i,N = m*n*b->bs2; 664 MatScalar *vsingle; 665 666 PetscFunctionBegin; 667 if (N > b->setvalueslen) { 668 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 669 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 670 b->setvalueslen = N; 671 } 672 vsingle = b->setvaluescopy; 673 for (i=0; i<N; i++) { 674 vsingle[i] = v[i]; 675 } 676 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 #endif 680 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 684 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 685 { 686 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 687 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 688 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 689 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 690 PetscTruth roworiented=a->roworiented; 691 MatScalar *value = v,*ap,*aa = a->a,*bap; 692 693 PetscFunctionBegin; 694 if (roworiented) { 695 stepval = (n-1)*bs; 696 } else { 697 stepval = (m-1)*bs; 698 } 699 for (k=0; k<m; k++) { /* loop over added rows */ 700 row = im[k]; 701 if (row < 0) continue; 702 #if defined(PETSC_USE_BOPT_g) 703 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 704 #endif 705 rp = aj + ai[row]; 706 ap = aa + bs2*ai[row]; 707 rmax = imax[row]; 708 nrow = ailen[row]; 709 low = 0; 710 for (l=0; l<n; l++) { /* loop over added columns */ 711 if (in[l] < 0) continue; 712 #if defined(PETSC_USE_BOPT_g) 713 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 714 #endif 715 col = in[l]; 716 if (roworiented) { 717 value = v + k*(stepval+bs)*bs + l*bs; 718 } else { 719 value = v + l*(stepval+bs)*bs + k*bs; 720 } 721 if (!sorted) low = 0; high = nrow; 722 while (high-low > 7) { 723 t = (low+high)/2; 724 if (rp[t] > col) high = t; 725 else low = t; 726 } 727 for (i=low; i<high; i++) { 728 if (rp[i] > col) break; 729 if (rp[i] == col) { 730 bap = ap + bs2*i; 731 if (roworiented) { 732 if (is == ADD_VALUES) { 733 for (ii=0; ii<bs; ii++,value+=stepval) { 734 for (jj=ii; jj<bs2; jj+=bs) { 735 bap[jj] += *value++; 736 } 737 } 738 } else { 739 for (ii=0; ii<bs; ii++,value+=stepval) { 740 for (jj=ii; jj<bs2; jj+=bs) { 741 bap[jj] = *value++; 742 } 743 } 744 } 745 } else { 746 if (is == ADD_VALUES) { 747 for (ii=0; ii<bs; ii++,value+=stepval) { 748 for (jj=0; jj<bs; jj++) { 749 *bap++ += *value++; 750 } 751 } 752 } else { 753 for (ii=0; ii<bs; ii++,value+=stepval) { 754 for (jj=0; jj<bs; jj++) { 755 *bap++ = *value++; 756 } 757 } 758 } 759 } 760 goto noinsert2; 761 } 762 } 763 if (nonew == 1) goto noinsert2; 764 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 765 if (nrow >= rmax) { 766 /* there is no extra room in row, therefore enlarge */ 767 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 768 MatScalar *new_a; 769 770 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 771 772 /* malloc new storage space */ 773 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 774 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 775 new_j = (int*)(new_a + bs2*new_nz); 776 new_i = new_j + new_nz; 777 778 /* copy over old data into new slots */ 779 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 780 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 781 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 782 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 783 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 784 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 785 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 786 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 787 /* free up old matrix storage */ 788 ierr = PetscFree(a->a);CHKERRQ(ierr); 789 if (!a->singlemalloc) { 790 ierr = PetscFree(a->i);CHKERRQ(ierr); 791 ierr = PetscFree(a->j);CHKERRQ(ierr); 792 } 793 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 794 a->singlemalloc = PETSC_TRUE; 795 796 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 797 rmax = imax[row] = imax[row] + CHUNKSIZE; 798 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 799 a->maxnz += bs2*CHUNKSIZE; 800 a->reallocs++; 801 a->nz++; 802 } 803 N = nrow++ - 1; 804 /* shift up all the later entries in this row */ 805 for (ii=N; ii>=i; ii--) { 806 rp[ii+1] = rp[ii]; 807 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 808 } 809 if (N >= i) { 810 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 811 } 812 rp[i] = col; 813 bap = ap + bs2*i; 814 if (roworiented) { 815 for (ii=0; ii<bs; ii++,value+=stepval) { 816 for (jj=ii; jj<bs2; jj+=bs) { 817 bap[jj] = *value++; 818 } 819 } 820 } else { 821 for (ii=0; ii<bs; ii++,value+=stepval) { 822 for (jj=0; jj<bs; jj++) { 823 *bap++ = *value++; 824 } 825 } 826 } 827 noinsert2:; 828 low = i; 829 } 830 ailen[row] = nrow; 831 } 832 PetscFunctionReturn(0); 833 } 834 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 838 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 839 { 840 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 841 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 842 int m = A->m,*ip,N,*ailen = a->ilen; 843 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 844 MatScalar *aa = a->a,*ap; 845 846 PetscFunctionBegin; 847 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 848 849 if (m) rmax = ailen[0]; 850 for (i=1; i<mbs; i++) { 851 /* move each row back by the amount of empty slots (fshift) before it*/ 852 fshift += imax[i-1] - ailen[i-1]; 853 rmax = PetscMax(rmax,ailen[i]); 854 if (fshift) { 855 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 856 N = ailen[i]; 857 for (j=0; j<N; j++) { 858 ip[j-fshift] = ip[j]; 859 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 860 } 861 } 862 ai[i] = ai[i-1] + ailen[i-1]; 863 } 864 if (mbs) { 865 fshift += imax[mbs-1] - ailen[mbs-1]; 866 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 867 } 868 /* reset ilen and imax for each row */ 869 for (i=0; i<mbs; i++) { 870 ailen[i] = imax[i] = ai[i+1] - ai[i]; 871 } 872 a->nz = ai[mbs]; 873 874 /* diagonals may have moved, so kill the diagonal pointers */ 875 if (fshift && a->diag) { 876 ierr = PetscFree(a->diag);CHKERRQ(ierr); 877 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 878 a->diag = 0; 879 } 880 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 881 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 882 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 883 a->reallocs = 0; 884 A->info.nz_unneeded = (PetscReal)fshift*bs2; 885 886 PetscFunctionReturn(0); 887 } 888 889 890 891 /* 892 This function returns an array of flags which indicate the locations of contiguous 893 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 894 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 895 Assume: sizes should be long enough to hold all the values. 896 */ 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 899 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 900 { 901 int i,j,k,row; 902 PetscTruth flg; 903 904 PetscFunctionBegin; 905 for (i=0,j=0; i<n; j++) { 906 row = idx[i]; 907 if (row%bs!=0) { /* Not the begining of a block */ 908 sizes[j] = 1; 909 i++; 910 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 911 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 912 i++; 913 } else { /* Begining of the block, so check if the complete block exists */ 914 flg = PETSC_TRUE; 915 for (k=1; k<bs; k++) { 916 if (row+k != idx[i+k]) { /* break in the block */ 917 flg = PETSC_FALSE; 918 break; 919 } 920 } 921 if (flg == PETSC_TRUE) { /* No break in the bs */ 922 sizes[j] = bs; 923 i+= bs; 924 } else { 925 sizes[j] = 1; 926 i++; 927 } 928 } 929 } 930 *bs_max = j; 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 936 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 937 { 938 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 939 int ierr,i,j,k,count,is_n,*is_idx,*rows; 940 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 941 PetscScalar zero = 0.0; 942 MatScalar *aa; 943 944 PetscFunctionBegin; 945 /* Make a copy of the IS and sort it */ 946 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 947 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 948 949 /* allocate memory for rows,sizes */ 950 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 951 sizes = rows + is_n; 952 953 /* copy IS values to rows, and sort them */ 954 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 955 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 956 if (baij->keepzeroedrows) { 957 for (i=0; i<is_n; i++) { sizes[i] = 1; } 958 bs_max = is_n; 959 } else { 960 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 961 } 962 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 963 964 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 965 row = rows[j]; 966 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 967 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 968 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 969 if (sizes[i] == bs && !baij->keepzeroedrows) { 970 if (diag) { 971 if (baij->ilen[row/bs] > 0) { 972 baij->ilen[row/bs] = 1; 973 baij->j[baij->i[row/bs]] = row/bs; 974 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 975 } 976 /* Now insert all the diagonal values for this bs */ 977 for (k=0; k<bs; k++) { 978 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 979 } 980 } else { /* (!diag) */ 981 baij->ilen[row/bs] = 0; 982 } /* end (!diag) */ 983 } else { /* (sizes[i] != bs) */ 984 #if defined (PETSC_USE_DEBUG) 985 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 986 #endif 987 for (k=0; k<count; k++) { 988 aa[0] = zero; 989 aa += bs; 990 } 991 if (diag) { 992 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 993 } 994 } 995 } 996 997 ierr = PetscFree(rows);CHKERRQ(ierr); 998 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1004 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1005 { 1006 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1007 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1008 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1009 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1010 int ridx,cidx,bs2=a->bs2,ierr; 1011 PetscTruth roworiented=a->roworiented; 1012 MatScalar *ap,value,*aa=a->a,*bap; 1013 1014 PetscFunctionBegin; 1015 for (k=0; k<m; k++) { /* loop over added rows */ 1016 row = im[k]; brow = row/bs; 1017 if (row < 0) continue; 1018 #if defined(PETSC_USE_BOPT_g) 1019 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1020 #endif 1021 rp = aj + ai[brow]; 1022 ap = aa + bs2*ai[brow]; 1023 rmax = imax[brow]; 1024 nrow = ailen[brow]; 1025 low = 0; 1026 for (l=0; l<n; l++) { /* loop over added columns */ 1027 if (in[l] < 0) continue; 1028 #if defined(PETSC_USE_BOPT_g) 1029 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1030 #endif 1031 col = in[l]; bcol = col/bs; 1032 ridx = row % bs; cidx = col % bs; 1033 if (roworiented) { 1034 value = v[l + k*n]; 1035 } else { 1036 value = v[k + l*m]; 1037 } 1038 if (!sorted) low = 0; high = nrow; 1039 while (high-low > 7) { 1040 t = (low+high)/2; 1041 if (rp[t] > bcol) high = t; 1042 else low = t; 1043 } 1044 for (i=low; i<high; i++) { 1045 if (rp[i] > bcol) break; 1046 if (rp[i] == bcol) { 1047 bap = ap + bs2*i + bs*cidx + ridx; 1048 if (is == ADD_VALUES) *bap += value; 1049 else *bap = value; 1050 goto noinsert1; 1051 } 1052 } 1053 if (nonew == 1) goto noinsert1; 1054 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1055 if (nrow >= rmax) { 1056 /* there is no extra room in row, therefore enlarge */ 1057 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1058 MatScalar *new_a; 1059 1060 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1061 1062 /* Malloc new storage space */ 1063 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1064 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1065 new_j = (int*)(new_a + bs2*new_nz); 1066 new_i = new_j + new_nz; 1067 1068 /* copy over old data into new slots */ 1069 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1070 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1071 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1072 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1073 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1074 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1075 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1076 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1077 /* free up old matrix storage */ 1078 ierr = PetscFree(a->a);CHKERRQ(ierr); 1079 if (!a->singlemalloc) { 1080 ierr = PetscFree(a->i);CHKERRQ(ierr); 1081 ierr = PetscFree(a->j);CHKERRQ(ierr); 1082 } 1083 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1084 a->singlemalloc = PETSC_TRUE; 1085 1086 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1087 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1088 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1089 a->maxnz += bs2*CHUNKSIZE; 1090 a->reallocs++; 1091 a->nz++; 1092 } 1093 N = nrow++ - 1; 1094 /* shift up all the later entries in this row */ 1095 for (ii=N; ii>=i; ii--) { 1096 rp[ii+1] = rp[ii]; 1097 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1098 } 1099 if (N>=i) { 1100 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1101 } 1102 rp[i] = bcol; 1103 ap[bs2*i + bs*cidx + ridx] = value; 1104 noinsert1:; 1105 low = i; 1106 } 1107 ailen[brow] = nrow; 1108 } 1109 PetscFunctionReturn(0); 1110 } 1111 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1115 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1116 { 1117 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1118 Mat outA; 1119 int ierr; 1120 PetscTruth row_identity,col_identity; 1121 1122 PetscFunctionBegin; 1123 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1124 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1125 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1126 if (!row_identity || !col_identity) { 1127 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1128 } 1129 1130 outA = inA; 1131 inA->factor = FACTOR_LU; 1132 1133 if (!a->diag) { 1134 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1135 } 1136 1137 a->row = row; 1138 a->col = col; 1139 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1140 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1141 1142 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1143 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1144 PetscLogObjectParent(inA,a->icol); 1145 1146 /* 1147 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1148 for ILU(0) factorization with natural ordering 1149 */ 1150 if (a->bs < 8) { 1151 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1152 } else { 1153 if (!a->solve_work) { 1154 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1155 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1156 } 1157 } 1158 1159 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1160 1161 PetscFunctionReturn(0); 1162 } 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1165 int MatPrintHelp_SeqBAIJ(Mat A) 1166 { 1167 static PetscTruth called = PETSC_FALSE; 1168 MPI_Comm comm = A->comm; 1169 int ierr; 1170 1171 PetscFunctionBegin; 1172 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1173 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1174 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 EXTERN_C_BEGIN 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1181 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1182 { 1183 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1184 int i,nz,nbs; 1185 1186 PetscFunctionBegin; 1187 nz = baij->maxnz/baij->bs2; 1188 nbs = baij->nbs; 1189 for (i=0; i<nz; i++) { 1190 baij->j[i] = indices[i]; 1191 } 1192 baij->nz = nz; 1193 for (i=0; i<nbs; i++) { 1194 baij->ilen[i] = baij->imax[i]; 1195 } 1196 1197 PetscFunctionReturn(0); 1198 } 1199 EXTERN_C_END 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1203 /*@ 1204 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1205 in the matrix. 1206 1207 Input Parameters: 1208 + mat - the SeqBAIJ matrix 1209 - indices - the column indices 1210 1211 Level: advanced 1212 1213 Notes: 1214 This can be called if you have precomputed the nonzero structure of the 1215 matrix and want to provide it to the matrix object to improve the performance 1216 of the MatSetValues() operation. 1217 1218 You MUST have set the correct numbers of nonzeros per row in the call to 1219 MatCreateSeqBAIJ(). 1220 1221 MUST be called before any calls to MatSetValues(); 1222 1223 @*/ 1224 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1225 { 1226 int ierr,(*f)(Mat,int *); 1227 1228 PetscFunctionBegin; 1229 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1230 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1231 if (f) { 1232 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1233 } else { 1234 SETERRQ(1,"Wrong type of matrix to set column indices"); 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1241 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1242 { 1243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1244 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1245 PetscReal atmp; 1246 PetscScalar *x,zero = 0.0; 1247 MatScalar *aa; 1248 int ncols,brow,krow,kcol; 1249 1250 PetscFunctionBegin; 1251 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1252 bs = a->bs; 1253 aa = a->a; 1254 ai = a->i; 1255 aj = a->j; 1256 mbs = a->mbs; 1257 1258 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1259 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1260 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1261 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1262 for (i=0; i<mbs; i++) { 1263 ncols = ai[1] - ai[0]; ai++; 1264 brow = bs*i; 1265 for (j=0; j<ncols; j++){ 1266 /* bcol = bs*(*aj); */ 1267 for (kcol=0; kcol<bs; kcol++){ 1268 for (krow=0; krow<bs; krow++){ 1269 atmp = PetscAbsScalar(*aa); aa++; 1270 row = brow + krow; /* row index */ 1271 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1272 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1273 } 1274 } 1275 aj++; 1276 } 1277 } 1278 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1284 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1285 { 1286 int ierr; 1287 1288 PetscFunctionBegin; 1289 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1295 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1296 { 1297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1298 PetscFunctionBegin; 1299 *array = a->a; 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1305 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1306 { 1307 PetscFunctionBegin; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 /* -------------------------------------------------------------------*/ 1312 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1313 MatGetRow_SeqBAIJ, 1314 MatRestoreRow_SeqBAIJ, 1315 MatMult_SeqBAIJ_N, 1316 MatMultAdd_SeqBAIJ_N, 1317 MatMultTranspose_SeqBAIJ, 1318 MatMultTransposeAdd_SeqBAIJ, 1319 MatSolve_SeqBAIJ_N, 1320 0, 1321 0, 1322 0, 1323 MatLUFactor_SeqBAIJ, 1324 0, 1325 0, 1326 MatTranspose_SeqBAIJ, 1327 MatGetInfo_SeqBAIJ, 1328 MatEqual_SeqBAIJ, 1329 MatGetDiagonal_SeqBAIJ, 1330 MatDiagonalScale_SeqBAIJ, 1331 MatNorm_SeqBAIJ, 1332 0, 1333 MatAssemblyEnd_SeqBAIJ, 1334 0, 1335 MatSetOption_SeqBAIJ, 1336 MatZeroEntries_SeqBAIJ, 1337 MatZeroRows_SeqBAIJ, 1338 MatLUFactorSymbolic_SeqBAIJ, 1339 MatLUFactorNumeric_SeqBAIJ_N, 1340 0, 1341 0, 1342 MatSetUpPreallocation_SeqBAIJ, 1343 MatILUFactorSymbolic_SeqBAIJ, 1344 0, 1345 MatGetArray_SeqBAIJ, 1346 MatRestoreArray_SeqBAIJ, 1347 MatDuplicate_SeqBAIJ, 1348 0, 1349 0, 1350 MatILUFactor_SeqBAIJ, 1351 0, 1352 0, 1353 MatGetSubMatrices_SeqBAIJ, 1354 MatIncreaseOverlap_SeqBAIJ, 1355 MatGetValues_SeqBAIJ, 1356 0, 1357 MatPrintHelp_SeqBAIJ, 1358 MatScale_SeqBAIJ, 1359 0, 1360 0, 1361 0, 1362 MatGetBlockSize_SeqBAIJ, 1363 MatGetRowIJ_SeqBAIJ, 1364 MatRestoreRowIJ_SeqBAIJ, 1365 0, 1366 0, 1367 0, 1368 0, 1369 0, 1370 0, 1371 MatSetValuesBlocked_SeqBAIJ, 1372 MatGetSubMatrix_SeqBAIJ, 1373 MatDestroy_SeqBAIJ, 1374 MatView_SeqBAIJ, 1375 MatGetPetscMaps_Petsc, 1376 0, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 MatGetRowMax_SeqBAIJ, 1383 MatConvert_Basic}; 1384 1385 EXTERN_C_BEGIN 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1388 int MatStoreValues_SeqBAIJ(Mat mat) 1389 { 1390 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1391 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1392 int ierr; 1393 1394 PetscFunctionBegin; 1395 if (aij->nonew != 1) { 1396 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1397 } 1398 1399 /* allocate space for values if not already there */ 1400 if (!aij->saved_values) { 1401 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1402 } 1403 1404 /* copy values over */ 1405 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 EXTERN_C_END 1409 1410 EXTERN_C_BEGIN 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1413 int MatRetrieveValues_SeqBAIJ(Mat mat) 1414 { 1415 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1416 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1417 1418 PetscFunctionBegin; 1419 if (aij->nonew != 1) { 1420 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1421 } 1422 if (!aij->saved_values) { 1423 SETERRQ(1,"Must call MatStoreValues(A);first"); 1424 } 1425 1426 /* copy values over */ 1427 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 EXTERN_C_END 1431 1432 EXTERN_C_BEGIN 1433 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1434 EXTERN_C_END 1435 1436 EXTERN_C_BEGIN 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatCreate_SeqBAIJ" 1439 int MatCreate_SeqBAIJ(Mat B) 1440 { 1441 int ierr,size; 1442 Mat_SeqBAIJ *b; 1443 1444 PetscFunctionBegin; 1445 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1446 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1447 1448 B->m = B->M = PetscMax(B->m,B->M); 1449 B->n = B->N = PetscMax(B->n,B->N); 1450 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1451 B->data = (void*)b; 1452 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1453 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1454 B->factor = 0; 1455 B->lupivotthreshold = 1.0; 1456 B->mapping = 0; 1457 b->row = 0; 1458 b->col = 0; 1459 b->icol = 0; 1460 b->reallocs = 0; 1461 b->saved_values = 0; 1462 #if defined(PETSC_USE_MAT_SINGLE) 1463 b->setvalueslen = 0; 1464 b->setvaluescopy = PETSC_NULL; 1465 #endif 1466 b->single_precision_solves = PETSC_FALSE; 1467 1468 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1469 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1470 1471 b->sorted = PETSC_FALSE; 1472 b->roworiented = PETSC_TRUE; 1473 b->nonew = 0; 1474 b->diag = 0; 1475 b->solve_work = 0; 1476 b->mult_work = 0; 1477 b->spptr = 0; 1478 B->info.nz_unneeded = (PetscReal)b->maxnz; 1479 b->keepzeroedrows = PETSC_FALSE; 1480 1481 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1482 "MatStoreValues_SeqBAIJ", 1483 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1484 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1485 "MatRetrieveValues_SeqBAIJ", 1486 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1487 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1488 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1489 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1490 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1491 "MatConvert_SeqBAIJ_SeqAIJ", 1492 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 EXTERN_C_END 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1499 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1500 { 1501 Mat C; 1502 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1503 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1504 1505 PetscFunctionBegin; 1506 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1507 1508 *B = 0; 1509 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1510 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1511 c = (Mat_SeqBAIJ*)C->data; 1512 1513 c->bs = a->bs; 1514 c->bs2 = a->bs2; 1515 c->mbs = a->mbs; 1516 c->nbs = a->nbs; 1517 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1518 1519 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1520 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1521 for (i=0; i<mbs; i++) { 1522 c->imax[i] = a->imax[i]; 1523 c->ilen[i] = a->ilen[i]; 1524 } 1525 1526 /* allocate the matrix space */ 1527 c->singlemalloc = PETSC_TRUE; 1528 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1529 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1530 c->j = (int*)(c->a + nz*bs2); 1531 c->i = c->j + nz; 1532 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1533 if (mbs > 0) { 1534 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1535 if (cpvalues == MAT_COPY_VALUES) { 1536 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1537 } else { 1538 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1539 } 1540 } 1541 1542 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1543 c->sorted = a->sorted; 1544 c->roworiented = a->roworiented; 1545 c->nonew = a->nonew; 1546 1547 if (a->diag) { 1548 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1549 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1550 for (i=0; i<mbs; i++) { 1551 c->diag[i] = a->diag[i]; 1552 } 1553 } else c->diag = 0; 1554 c->nz = a->nz; 1555 c->maxnz = a->maxnz; 1556 c->solve_work = 0; 1557 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1558 c->mult_work = 0; 1559 C->preallocated = PETSC_TRUE; 1560 C->assembled = PETSC_TRUE; 1561 *B = C; 1562 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 EXTERN_C_BEGIN 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatLoad_SeqBAIJ" 1569 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1570 { 1571 Mat_SeqBAIJ *a; 1572 Mat B; 1573 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1574 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1575 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1576 int *masked,nmask,tmp,bs2,ishift; 1577 PetscScalar *aa; 1578 MPI_Comm comm = ((PetscObject)viewer)->comm; 1579 1580 PetscFunctionBegin; 1581 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1582 bs2 = bs*bs; 1583 1584 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1585 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1586 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1587 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1588 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1589 M = header[1]; N = header[2]; nz = header[3]; 1590 1591 if (header[3] < 0) { 1592 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1593 } 1594 1595 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1596 1597 /* 1598 This code adds extra rows to make sure the number of rows is 1599 divisible by the blocksize 1600 */ 1601 mbs = M/bs; 1602 extra_rows = bs - M + bs*(mbs); 1603 if (extra_rows == bs) extra_rows = 0; 1604 else mbs++; 1605 if (extra_rows) { 1606 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1607 } 1608 1609 /* read in row lengths */ 1610 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1611 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1612 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1613 1614 /* read in column indices */ 1615 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1616 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1617 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1618 1619 /* loop over row lengths determining block row lengths */ 1620 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1621 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1622 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1623 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1624 masked = mask + mbs; 1625 rowcount = 0; nzcount = 0; 1626 for (i=0; i<mbs; i++) { 1627 nmask = 0; 1628 for (j=0; j<bs; j++) { 1629 kmax = rowlengths[rowcount]; 1630 for (k=0; k<kmax; k++) { 1631 tmp = jj[nzcount++]/bs; 1632 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1633 } 1634 rowcount++; 1635 } 1636 browlengths[i] += nmask; 1637 /* zero out the mask elements we set */ 1638 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1639 } 1640 1641 /* create our matrix */ 1642 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1643 B = *A; 1644 a = (Mat_SeqBAIJ*)B->data; 1645 1646 /* set matrix "i" values */ 1647 a->i[0] = 0; 1648 for (i=1; i<= mbs; i++) { 1649 a->i[i] = a->i[i-1] + browlengths[i-1]; 1650 a->ilen[i-1] = browlengths[i-1]; 1651 } 1652 a->nz = 0; 1653 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1654 1655 /* read in nonzero values */ 1656 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1657 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1658 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1659 1660 /* set "a" and "j" values into matrix */ 1661 nzcount = 0; jcount = 0; 1662 for (i=0; i<mbs; i++) { 1663 nzcountb = nzcount; 1664 nmask = 0; 1665 for (j=0; j<bs; j++) { 1666 kmax = rowlengths[i*bs+j]; 1667 for (k=0; k<kmax; k++) { 1668 tmp = jj[nzcount++]/bs; 1669 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1670 } 1671 } 1672 /* sort the masked values */ 1673 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1674 1675 /* set "j" values into matrix */ 1676 maskcount = 1; 1677 for (j=0; j<nmask; j++) { 1678 a->j[jcount++] = masked[j]; 1679 mask[masked[j]] = maskcount++; 1680 } 1681 /* set "a" values into matrix */ 1682 ishift = bs2*a->i[i]; 1683 for (j=0; j<bs; j++) { 1684 kmax = rowlengths[i*bs+j]; 1685 for (k=0; k<kmax; k++) { 1686 tmp = jj[nzcountb]/bs ; 1687 block = mask[tmp] - 1; 1688 point = jj[nzcountb] - bs*tmp; 1689 idx = ishift + bs2*block + j + bs*point; 1690 a->a[idx] = (MatScalar)aa[nzcountb++]; 1691 } 1692 } 1693 /* zero out the mask elements we set */ 1694 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1695 } 1696 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1697 1698 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1699 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1700 ierr = PetscFree(aa);CHKERRQ(ierr); 1701 ierr = PetscFree(jj);CHKERRQ(ierr); 1702 ierr = PetscFree(mask);CHKERRQ(ierr); 1703 1704 B->assembled = PETSC_TRUE; 1705 1706 ierr = MatView_Private(B);CHKERRQ(ierr); 1707 PetscFunctionReturn(0); 1708 } 1709 EXTERN_C_END 1710 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatCreateSeqBAIJ" 1713 /*@C 1714 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1715 compressed row) format. For good matrix assembly performance the 1716 user should preallocate the matrix storage by setting the parameter nz 1717 (or the array nnz). By setting these parameters accurately, performance 1718 during matrix assembly can be increased by more than a factor of 50. 1719 1720 Collective on MPI_Comm 1721 1722 Input Parameters: 1723 + comm - MPI communicator, set to PETSC_COMM_SELF 1724 . bs - size of block 1725 . m - number of rows 1726 . n - number of columns 1727 . nz - number of nonzero blocks per block row (same for all rows) 1728 - nnz - array containing the number of nonzero blocks in the various block rows 1729 (possibly different for each block row) or PETSC_NULL 1730 1731 Output Parameter: 1732 . A - the matrix 1733 1734 Options Database Keys: 1735 . -mat_no_unroll - uses code that does not unroll the loops in the 1736 block calculations (much slower) 1737 . -mat_block_size - size of the blocks to use 1738 1739 Level: intermediate 1740 1741 Notes: 1742 A nonzero block is any block that as 1 or more nonzeros in it 1743 1744 The block AIJ format is fully compatible with standard Fortran 77 1745 storage. That is, the stored row and column indices can begin at 1746 either one (as in Fortran) or zero. See the users' manual for details. 1747 1748 Specify the preallocated storage with either nz or nnz (not both). 1749 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1750 allocation. For additional details, see the users manual chapter on 1751 matrices. 1752 1753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1754 @*/ 1755 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1756 { 1757 int ierr; 1758 1759 PetscFunctionBegin; 1760 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1761 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1762 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1768 /*@C 1769 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1770 per row in the matrix. For good matrix assembly performance the 1771 user should preallocate the matrix storage by setting the parameter nz 1772 (or the array nnz). By setting these parameters accurately, performance 1773 during matrix assembly can be increased by more than a factor of 50. 1774 1775 Collective on MPI_Comm 1776 1777 Input Parameters: 1778 + A - the matrix 1779 . bs - size of block 1780 . nz - number of block nonzeros per block row (same for all rows) 1781 - nnz - array containing the number of block nonzeros in the various block rows 1782 (possibly different for each block row) or PETSC_NULL 1783 1784 Options Database Keys: 1785 . -mat_no_unroll - uses code that does not unroll the loops in the 1786 block calculations (much slower) 1787 . -mat_block_size - size of the blocks to use 1788 1789 Level: intermediate 1790 1791 Notes: 1792 The block AIJ format is fully compatible with standard Fortran 77 1793 storage. That is, the stored row and column indices can begin at 1794 either one (as in Fortran) or zero. See the users' manual for details. 1795 1796 Specify the preallocated storage with either nz or nnz (not both). 1797 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1798 allocation. For additional details, see the users manual chapter on 1799 matrices. 1800 1801 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1802 @*/ 1803 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1804 { 1805 Mat_SeqBAIJ *b; 1806 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1807 PetscTruth flg; 1808 1809 PetscFunctionBegin; 1810 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1811 if (!flg) PetscFunctionReturn(0); 1812 1813 B->preallocated = PETSC_TRUE; 1814 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1815 if (nnz && newbs != bs) { 1816 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1817 } 1818 bs = newbs; 1819 1820 mbs = B->m/bs; 1821 nbs = B->n/bs; 1822 bs2 = bs*bs; 1823 1824 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1825 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1826 } 1827 1828 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1829 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1830 if (nnz) { 1831 for (i=0; i<mbs; i++) { 1832 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1833 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1834 } 1835 } 1836 1837 b = (Mat_SeqBAIJ*)B->data; 1838 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1839 B->ops->solve = MatSolve_SeqBAIJ_Update; 1840 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1841 if (!flg) { 1842 switch (bs) { 1843 case 1: 1844 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1845 B->ops->mult = MatMult_SeqBAIJ_1; 1846 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1847 break; 1848 case 2: 1849 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1850 B->ops->mult = MatMult_SeqBAIJ_2; 1851 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1852 break; 1853 case 3: 1854 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1855 B->ops->mult = MatMult_SeqBAIJ_3; 1856 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1857 break; 1858 case 4: 1859 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1860 B->ops->mult = MatMult_SeqBAIJ_4; 1861 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1862 break; 1863 case 5: 1864 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1865 B->ops->mult = MatMult_SeqBAIJ_5; 1866 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1867 break; 1868 case 6: 1869 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1870 B->ops->mult = MatMult_SeqBAIJ_6; 1871 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1872 break; 1873 case 7: 1874 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1875 B->ops->mult = MatMult_SeqBAIJ_7; 1876 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1877 break; 1878 default: 1879 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1880 B->ops->mult = MatMult_SeqBAIJ_N; 1881 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1882 break; 1883 } 1884 } 1885 b->bs = bs; 1886 b->mbs = mbs; 1887 b->nbs = nbs; 1888 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1889 if (!nnz) { 1890 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1891 else if (nz <= 0) nz = 1; 1892 for (i=0; i<mbs; i++) b->imax[i] = nz; 1893 nz = nz*mbs; 1894 } else { 1895 nz = 0; 1896 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1897 } 1898 1899 /* allocate the matrix space */ 1900 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1901 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1902 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1903 b->j = (int*)(b->a + nz*bs2); 1904 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1905 b->i = b->j + nz; 1906 b->singlemalloc = PETSC_TRUE; 1907 1908 b->i[0] = 0; 1909 for (i=1; i<mbs+1; i++) { 1910 b->i[i] = b->i[i-1] + b->imax[i-1]; 1911 } 1912 1913 /* b->ilen will count nonzeros in each block row so far. */ 1914 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1915 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1916 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1917 1918 b->bs = bs; 1919 b->bs2 = bs2; 1920 b->mbs = mbs; 1921 b->nz = 0; 1922 b->maxnz = nz*bs2; 1923 B->info.nz_unneeded = (PetscReal)b->maxnz; 1924 PetscFunctionReturn(0); 1925 } 1926 1927