1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled_local,sse_enabled_global; 217 int ierr; 218 219 sse_enabled_local = PETSC_FALSE; 220 sse_enabled_global = PETSC_FALSE; 221 222 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 223 #if defined(PETSC_HAVE_SSE) 224 if (sse_enabled_local) { 225 a->single_precision_solves = PETSC_TRUE; 226 A->ops->solve = MatSolve_SeqBAIJ_Update; 227 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 228 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 229 break; 230 } else { 231 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 232 } 233 #else 234 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 235 #endif 236 } 237 break; 238 default: 239 SETERRQ(PETSC_ERR_SUP,"unknown option"); 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 246 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 247 { 248 PetscFunctionBegin; 249 if (m) *m = 0; 250 if (n) *n = A->m; 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatGetRow_SeqBAIJ" 256 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 257 { 258 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 259 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 260 MatScalar *aa,*aa_i; 261 PetscScalar *v_i; 262 263 PetscFunctionBegin; 264 bs = a->bs; 265 ai = a->i; 266 aj = a->j; 267 aa = a->a; 268 bs2 = a->bs2; 269 270 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 271 272 bn = row/bs; /* Block number */ 273 bp = row % bs; /* Block Position */ 274 M = ai[bn+1] - ai[bn]; 275 *nz = bs*M; 276 277 if (v) { 278 *v = 0; 279 if (*nz) { 280 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 281 for (i=0; i<M; i++) { /* for each block in the block row */ 282 v_i = *v + i*bs; 283 aa_i = aa + bs2*(ai[bn] + i); 284 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 285 } 286 } 287 } 288 289 if (idx) { 290 *idx = 0; 291 if (*nz) { 292 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 293 for (i=0; i<M; i++) { /* for each block in the block row */ 294 idx_i = *idx + i*bs; 295 itmp = bs*aj[ai[bn] + i]; 296 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 297 } 298 } 299 } 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 305 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 306 { 307 int ierr; 308 309 PetscFunctionBegin; 310 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 311 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatTranspose_SeqBAIJ" 317 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 318 { 319 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 320 Mat C; 321 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 322 int *rows,*cols,bs2=a->bs2; 323 PetscScalar *array; 324 325 PetscFunctionBegin; 326 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 327 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 328 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 329 330 #if defined(PETSC_USE_MAT_SINGLE) 331 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 332 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 333 #else 334 array = a->a; 335 #endif 336 337 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 338 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 339 ierr = PetscFree(col);CHKERRQ(ierr); 340 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 341 cols = rows + bs; 342 for (i=0; i<mbs; i++) { 343 cols[0] = i*bs; 344 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 345 len = ai[i+1] - ai[i]; 346 for (j=0; j<len; j++) { 347 rows[0] = (*aj++)*bs; 348 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 349 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 350 array += bs2; 351 } 352 } 353 ierr = PetscFree(rows);CHKERRQ(ierr); 354 #if defined(PETSC_USE_MAT_SINGLE) 355 ierr = PetscFree(array); 356 #endif 357 358 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 360 361 if (B) { 362 *B = C; 363 } else { 364 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 365 } 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 371 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 372 { 373 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 374 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 375 PetscScalar *aa; 376 FILE *file; 377 378 PetscFunctionBegin; 379 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 380 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 381 col_lens[0] = MAT_COOKIE; 382 383 col_lens[1] = A->m; 384 col_lens[2] = A->n; 385 col_lens[3] = a->nz*bs2; 386 387 /* store lengths of each row and write (including header) to file */ 388 count = 0; 389 for (i=0; i<a->mbs; i++) { 390 for (j=0; j<bs; j++) { 391 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 392 } 393 } 394 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 395 ierr = PetscFree(col_lens);CHKERRQ(ierr); 396 397 /* store column indices (zero start index) */ 398 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 399 count = 0; 400 for (i=0; i<a->mbs; i++) { 401 for (j=0; j<bs; j++) { 402 for (k=a->i[i]; k<a->i[i+1]; k++) { 403 for (l=0; l<bs; l++) { 404 jj[count++] = bs*a->j[k] + l; 405 } 406 } 407 } 408 } 409 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 410 ierr = PetscFree(jj);CHKERRQ(ierr); 411 412 /* store nonzero values */ 413 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 414 count = 0; 415 for (i=0; i<a->mbs; i++) { 416 for (j=0; j<bs; j++) { 417 for (k=a->i[i]; k<a->i[i+1]; k++) { 418 for (l=0; l<bs; l++) { 419 aa[count++] = a->a[bs2*k + l*bs + j]; 420 } 421 } 422 } 423 } 424 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 425 ierr = PetscFree(aa);CHKERRQ(ierr); 426 427 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 428 if (file) { 429 fprintf(file,"-matload_block_size %d\n",a->bs); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 436 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 437 { 438 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 439 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 440 PetscViewerFormat format; 441 442 PetscFunctionBegin; 443 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 444 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 445 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 446 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 447 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 448 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 449 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 450 for (i=0; i<a->mbs; i++) { 451 for (j=0; j<bs; j++) { 452 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 453 for (k=a->i[i]; k<a->i[i+1]; k++) { 454 for (l=0; l<bs; l++) { 455 #if defined(PETSC_USE_COMPLEX) 456 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 458 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 459 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 460 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 461 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 462 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 463 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 464 } 465 #else 466 if (a->a[bs2*k + l*bs + j] != 0.0) { 467 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 468 } 469 #endif 470 } 471 } 472 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 473 } 474 } 475 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 476 } else { 477 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 478 for (i=0; i<a->mbs; i++) { 479 for (j=0; j<bs; j++) { 480 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 481 for (k=a->i[i]; k<a->i[i+1]; k++) { 482 for (l=0; l<bs; l++) { 483 #if defined(PETSC_USE_COMPLEX) 484 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 485 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 486 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 487 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 488 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 489 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 490 } else { 491 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 492 } 493 #else 494 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 495 #endif 496 } 497 } 498 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 499 } 500 } 501 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 502 } 503 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 509 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 510 { 511 Mat A = (Mat) Aa; 512 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 513 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 514 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 515 MatScalar *aa; 516 PetscViewer viewer; 517 518 PetscFunctionBegin; 519 520 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 521 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 522 523 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 524 525 /* loop over matrix elements drawing boxes */ 526 color = PETSC_DRAW_BLUE; 527 for (i=0,row=0; i<mbs; i++,row+=bs) { 528 for (j=a->i[i]; j<a->i[i+1]; j++) { 529 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 530 x_l = a->j[j]*bs; x_r = x_l + 1.0; 531 aa = a->a + j*bs2; 532 for (k=0; k<bs; k++) { 533 for (l=0; l<bs; l++) { 534 if (PetscRealPart(*aa++) >= 0.) continue; 535 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 536 } 537 } 538 } 539 } 540 color = PETSC_DRAW_CYAN; 541 for (i=0,row=0; i<mbs; i++,row+=bs) { 542 for (j=a->i[i]; j<a->i[i+1]; j++) { 543 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 544 x_l = a->j[j]*bs; x_r = x_l + 1.0; 545 aa = a->a + j*bs2; 546 for (k=0; k<bs; k++) { 547 for (l=0; l<bs; l++) { 548 if (PetscRealPart(*aa++) != 0.) continue; 549 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 550 } 551 } 552 } 553 } 554 555 color = PETSC_DRAW_RED; 556 for (i=0,row=0; i<mbs; i++,row+=bs) { 557 for (j=a->i[i]; j<a->i[i+1]; j++) { 558 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 559 x_l = a->j[j]*bs; x_r = x_l + 1.0; 560 aa = a->a + j*bs2; 561 for (k=0; k<bs; k++) { 562 for (l=0; l<bs; l++) { 563 if (PetscRealPart(*aa++) <= 0.) continue; 564 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 565 } 566 } 567 } 568 } 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 574 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 575 { 576 int ierr; 577 PetscReal xl,yl,xr,yr,w,h; 578 PetscDraw draw; 579 PetscTruth isnull; 580 581 PetscFunctionBegin; 582 583 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 584 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 585 586 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 587 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 588 xr += w; yr += h; xl = -w; yl = -h; 589 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 590 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 591 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatView_SeqBAIJ" 597 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 598 { 599 int ierr; 600 PetscTruth issocket,isascii,isbinary,isdraw; 601 602 PetscFunctionBegin; 603 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 604 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 605 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 606 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 607 if (issocket) { 608 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 609 } else if (isascii){ 610 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 611 } else if (isbinary) { 612 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 613 } else if (isdraw) { 614 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 615 } else { 616 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 617 } 618 PetscFunctionReturn(0); 619 } 620 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatGetValues_SeqBAIJ" 624 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 625 { 626 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 627 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 628 int *ai = a->i,*ailen = a->ilen; 629 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 630 MatScalar *ap,*aa = a->a,zero = 0.0; 631 632 PetscFunctionBegin; 633 for (k=0; k<m; k++) { /* loop over rows */ 634 row = im[k]; brow = row/bs; 635 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 636 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 637 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 638 nrow = ailen[brow]; 639 for (l=0; l<n; l++) { /* loop over columns */ 640 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 641 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 642 col = in[l] ; 643 bcol = col/bs; 644 cidx = col%bs; 645 ridx = row%bs; 646 high = nrow; 647 low = 0; /* assume unsorted */ 648 while (high-low > 5) { 649 t = (low+high)/2; 650 if (rp[t] > bcol) high = t; 651 else low = t; 652 } 653 for (i=low; i<high; i++) { 654 if (rp[i] > bcol) break; 655 if (rp[i] == bcol) { 656 *v++ = ap[bs2*i+bs*cidx+ridx]; 657 goto finished; 658 } 659 } 660 *v++ = zero; 661 finished:; 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 #if defined(PETSC_USE_MAT_SINGLE) 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 670 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 671 { 672 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 673 int ierr,i,N = m*n*b->bs2; 674 MatScalar *vsingle; 675 676 PetscFunctionBegin; 677 if (N > b->setvalueslen) { 678 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 679 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 680 b->setvalueslen = N; 681 } 682 vsingle = b->setvaluescopy; 683 for (i=0; i<N; i++) { 684 vsingle[i] = v[i]; 685 } 686 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 #endif 690 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 694 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 695 { 696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 697 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 698 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 699 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 700 PetscTruth roworiented=a->roworiented; 701 MatScalar *value = v,*ap,*aa = a->a,*bap; 702 703 PetscFunctionBegin; 704 if (roworiented) { 705 stepval = (n-1)*bs; 706 } else { 707 stepval = (m-1)*bs; 708 } 709 for (k=0; k<m; k++) { /* loop over added rows */ 710 row = im[k]; 711 if (row < 0) continue; 712 #if defined(PETSC_USE_BOPT_g) 713 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 714 #endif 715 rp = aj + ai[row]; 716 ap = aa + bs2*ai[row]; 717 rmax = imax[row]; 718 nrow = ailen[row]; 719 low = 0; 720 for (l=0; l<n; l++) { /* loop over added columns */ 721 if (in[l] < 0) continue; 722 #if defined(PETSC_USE_BOPT_g) 723 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 724 #endif 725 col = in[l]; 726 if (roworiented) { 727 value = v + k*(stepval+bs)*bs + l*bs; 728 } else { 729 value = v + l*(stepval+bs)*bs + k*bs; 730 } 731 if (!sorted) low = 0; high = nrow; 732 while (high-low > 7) { 733 t = (low+high)/2; 734 if (rp[t] > col) high = t; 735 else low = t; 736 } 737 for (i=low; i<high; i++) { 738 if (rp[i] > col) break; 739 if (rp[i] == col) { 740 bap = ap + bs2*i; 741 if (roworiented) { 742 if (is == ADD_VALUES) { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=ii; jj<bs2; jj+=bs) { 745 bap[jj] += *value++; 746 } 747 } 748 } else { 749 for (ii=0; ii<bs; ii++,value+=stepval) { 750 for (jj=ii; jj<bs2; jj+=bs) { 751 bap[jj] = *value++; 752 } 753 } 754 } 755 } else { 756 if (is == ADD_VALUES) { 757 for (ii=0; ii<bs; ii++,value+=stepval) { 758 for (jj=0; jj<bs; jj++) { 759 *bap++ += *value++; 760 } 761 } 762 } else { 763 for (ii=0; ii<bs; ii++,value+=stepval) { 764 for (jj=0; jj<bs; jj++) { 765 *bap++ = *value++; 766 } 767 } 768 } 769 } 770 goto noinsert2; 771 } 772 } 773 if (nonew == 1) goto noinsert2; 774 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 775 if (nrow >= rmax) { 776 /* there is no extra room in row, therefore enlarge */ 777 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 778 MatScalar *new_a; 779 780 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 781 782 /* malloc new storage space */ 783 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 784 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 785 new_j = (int*)(new_a + bs2*new_nz); 786 new_i = new_j + new_nz; 787 788 /* copy over old data into new slots */ 789 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 790 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 791 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 792 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 793 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 794 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 795 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 796 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 797 /* free up old matrix storage */ 798 ierr = PetscFree(a->a);CHKERRQ(ierr); 799 if (!a->singlemalloc) { 800 ierr = PetscFree(a->i);CHKERRQ(ierr); 801 ierr = PetscFree(a->j);CHKERRQ(ierr); 802 } 803 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 804 a->singlemalloc = PETSC_TRUE; 805 806 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 807 rmax = imax[row] = imax[row] + CHUNKSIZE; 808 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 809 a->maxnz += bs2*CHUNKSIZE; 810 a->reallocs++; 811 a->nz++; 812 } 813 N = nrow++ - 1; 814 /* shift up all the later entries in this row */ 815 for (ii=N; ii>=i; ii--) { 816 rp[ii+1] = rp[ii]; 817 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 818 } 819 if (N >= i) { 820 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 821 } 822 rp[i] = col; 823 bap = ap + bs2*i; 824 if (roworiented) { 825 for (ii=0; ii<bs; ii++,value+=stepval) { 826 for (jj=ii; jj<bs2; jj+=bs) { 827 bap[jj] = *value++; 828 } 829 } 830 } else { 831 for (ii=0; ii<bs; ii++,value+=stepval) { 832 for (jj=0; jj<bs; jj++) { 833 *bap++ = *value++; 834 } 835 } 836 } 837 noinsert2:; 838 low = i; 839 } 840 ailen[row] = nrow; 841 } 842 PetscFunctionReturn(0); 843 } 844 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 848 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 849 { 850 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 851 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 852 int m = A->m,*ip,N,*ailen = a->ilen; 853 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 854 MatScalar *aa = a->a,*ap; 855 856 PetscFunctionBegin; 857 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 858 859 if (m) rmax = ailen[0]; 860 for (i=1; i<mbs; i++) { 861 /* move each row back by the amount of empty slots (fshift) before it*/ 862 fshift += imax[i-1] - ailen[i-1]; 863 rmax = PetscMax(rmax,ailen[i]); 864 if (fshift) { 865 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 866 N = ailen[i]; 867 for (j=0; j<N; j++) { 868 ip[j-fshift] = ip[j]; 869 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 870 } 871 } 872 ai[i] = ai[i-1] + ailen[i-1]; 873 } 874 if (mbs) { 875 fshift += imax[mbs-1] - ailen[mbs-1]; 876 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 877 } 878 /* reset ilen and imax for each row */ 879 for (i=0; i<mbs; i++) { 880 ailen[i] = imax[i] = ai[i+1] - ai[i]; 881 } 882 a->nz = ai[mbs]; 883 884 /* diagonals may have moved, so kill the diagonal pointers */ 885 if (fshift && a->diag) { 886 ierr = PetscFree(a->diag);CHKERRQ(ierr); 887 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 888 a->diag = 0; 889 } 890 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 891 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 892 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 893 a->reallocs = 0; 894 A->info.nz_unneeded = (PetscReal)fshift*bs2; 895 896 PetscFunctionReturn(0); 897 } 898 899 900 901 /* 902 This function returns an array of flags which indicate the locations of contiguous 903 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 904 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 905 Assume: sizes should be long enough to hold all the values. 906 */ 907 #undef __FUNCT__ 908 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 909 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 910 { 911 int i,j,k,row; 912 PetscTruth flg; 913 914 PetscFunctionBegin; 915 for (i=0,j=0; i<n; j++) { 916 row = idx[i]; 917 if (row%bs!=0) { /* Not the begining of a block */ 918 sizes[j] = 1; 919 i++; 920 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 921 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 922 i++; 923 } else { /* Begining of the block, so check if the complete block exists */ 924 flg = PETSC_TRUE; 925 for (k=1; k<bs; k++) { 926 if (row+k != idx[i+k]) { /* break in the block */ 927 flg = PETSC_FALSE; 928 break; 929 } 930 } 931 if (flg == PETSC_TRUE) { /* No break in the bs */ 932 sizes[j] = bs; 933 i+= bs; 934 } else { 935 sizes[j] = 1; 936 i++; 937 } 938 } 939 } 940 *bs_max = j; 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 946 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 947 { 948 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 949 int ierr,i,j,k,count,is_n,*is_idx,*rows; 950 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 951 PetscScalar zero = 0.0; 952 MatScalar *aa; 953 954 PetscFunctionBegin; 955 /* Make a copy of the IS and sort it */ 956 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 957 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 958 959 /* allocate memory for rows,sizes */ 960 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 961 sizes = rows + is_n; 962 963 /* copy IS values to rows, and sort them */ 964 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 965 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 966 if (baij->keepzeroedrows) { 967 for (i=0; i<is_n; i++) { sizes[i] = 1; } 968 bs_max = is_n; 969 } else { 970 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 971 } 972 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 973 974 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 975 row = rows[j]; 976 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 977 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 978 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 979 if (sizes[i] == bs && !baij->keepzeroedrows) { 980 if (diag) { 981 if (baij->ilen[row/bs] > 0) { 982 baij->ilen[row/bs] = 1; 983 baij->j[baij->i[row/bs]] = row/bs; 984 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 985 } 986 /* Now insert all the diagonal values for this bs */ 987 for (k=0; k<bs; k++) { 988 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 989 } 990 } else { /* (!diag) */ 991 baij->ilen[row/bs] = 0; 992 } /* end (!diag) */ 993 } else { /* (sizes[i] != bs) */ 994 #if defined (PETSC_USE_DEBUG) 995 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 996 #endif 997 for (k=0; k<count; k++) { 998 aa[0] = zero; 999 aa += bs; 1000 } 1001 if (diag) { 1002 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1003 } 1004 } 1005 } 1006 1007 ierr = PetscFree(rows);CHKERRQ(ierr); 1008 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1014 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1015 { 1016 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1017 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1018 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1019 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1020 int ridx,cidx,bs2=a->bs2,ierr; 1021 PetscTruth roworiented=a->roworiented; 1022 MatScalar *ap,value,*aa=a->a,*bap; 1023 1024 PetscFunctionBegin; 1025 for (k=0; k<m; k++) { /* loop over added rows */ 1026 row = im[k]; brow = row/bs; 1027 if (row < 0) continue; 1028 #if defined(PETSC_USE_BOPT_g) 1029 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1030 #endif 1031 rp = aj + ai[brow]; 1032 ap = aa + bs2*ai[brow]; 1033 rmax = imax[brow]; 1034 nrow = ailen[brow]; 1035 low = 0; 1036 for (l=0; l<n; l++) { /* loop over added columns */ 1037 if (in[l] < 0) continue; 1038 #if defined(PETSC_USE_BOPT_g) 1039 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1040 #endif 1041 col = in[l]; bcol = col/bs; 1042 ridx = row % bs; cidx = col % bs; 1043 if (roworiented) { 1044 value = v[l + k*n]; 1045 } else { 1046 value = v[k + l*m]; 1047 } 1048 if (!sorted) low = 0; high = nrow; 1049 while (high-low > 7) { 1050 t = (low+high)/2; 1051 if (rp[t] > bcol) high = t; 1052 else low = t; 1053 } 1054 for (i=low; i<high; i++) { 1055 if (rp[i] > bcol) break; 1056 if (rp[i] == bcol) { 1057 bap = ap + bs2*i + bs*cidx + ridx; 1058 if (is == ADD_VALUES) *bap += value; 1059 else *bap = value; 1060 goto noinsert1; 1061 } 1062 } 1063 if (nonew == 1) goto noinsert1; 1064 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1065 if (nrow >= rmax) { 1066 /* there is no extra room in row, therefore enlarge */ 1067 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1068 MatScalar *new_a; 1069 1070 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1071 1072 /* Malloc new storage space */ 1073 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1074 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1075 new_j = (int*)(new_a + bs2*new_nz); 1076 new_i = new_j + new_nz; 1077 1078 /* copy over old data into new slots */ 1079 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1080 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1081 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1082 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1083 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1084 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1085 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1086 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1087 /* free up old matrix storage */ 1088 ierr = PetscFree(a->a);CHKERRQ(ierr); 1089 if (!a->singlemalloc) { 1090 ierr = PetscFree(a->i);CHKERRQ(ierr); 1091 ierr = PetscFree(a->j);CHKERRQ(ierr); 1092 } 1093 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1094 a->singlemalloc = PETSC_TRUE; 1095 1096 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1097 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1098 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1099 a->maxnz += bs2*CHUNKSIZE; 1100 a->reallocs++; 1101 a->nz++; 1102 } 1103 N = nrow++ - 1; 1104 /* shift up all the later entries in this row */ 1105 for (ii=N; ii>=i; ii--) { 1106 rp[ii+1] = rp[ii]; 1107 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1108 } 1109 if (N>=i) { 1110 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1111 } 1112 rp[i] = bcol; 1113 ap[bs2*i + bs*cidx + ridx] = value; 1114 noinsert1:; 1115 low = i; 1116 } 1117 ailen[brow] = nrow; 1118 } 1119 PetscFunctionReturn(0); 1120 } 1121 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1125 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1126 { 1127 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1128 Mat outA; 1129 int ierr; 1130 PetscTruth row_identity,col_identity; 1131 1132 PetscFunctionBegin; 1133 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1134 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1135 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1136 if (!row_identity || !col_identity) { 1137 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1138 } 1139 1140 outA = inA; 1141 inA->factor = FACTOR_LU; 1142 1143 if (!a->diag) { 1144 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1145 } 1146 1147 a->row = row; 1148 a->col = col; 1149 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1150 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1151 1152 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1153 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1154 PetscLogObjectParent(inA,a->icol); 1155 1156 /* 1157 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1158 for ILU(0) factorization with natural ordering 1159 */ 1160 if (a->bs < 8) { 1161 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1162 } else { 1163 if (!a->solve_work) { 1164 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1165 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1166 } 1167 } 1168 1169 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1170 1171 PetscFunctionReturn(0); 1172 } 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1175 int MatPrintHelp_SeqBAIJ(Mat A) 1176 { 1177 static PetscTruth called = PETSC_FALSE; 1178 MPI_Comm comm = A->comm; 1179 int ierr; 1180 1181 PetscFunctionBegin; 1182 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1183 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1184 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 EXTERN_C_BEGIN 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1191 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1192 { 1193 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1194 int i,nz,n; 1195 1196 PetscFunctionBegin; 1197 nz = baij->maxnz; 1198 n = mat->n; 1199 for (i=0; i<nz; i++) { 1200 baij->j[i] = indices[i]; 1201 } 1202 baij->nz = nz; 1203 for (i=0; i<n; i++) { 1204 baij->ilen[i] = baij->imax[i]; 1205 } 1206 1207 PetscFunctionReturn(0); 1208 } 1209 EXTERN_C_END 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1213 /*@ 1214 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1215 in the matrix. 1216 1217 Input Parameters: 1218 + mat - the SeqBAIJ matrix 1219 - indices - the column indices 1220 1221 Level: advanced 1222 1223 Notes: 1224 This can be called if you have precomputed the nonzero structure of the 1225 matrix and want to provide it to the matrix object to improve the performance 1226 of the MatSetValues() operation. 1227 1228 You MUST have set the correct numbers of nonzeros per row in the call to 1229 MatCreateSeqBAIJ(). 1230 1231 MUST be called before any calls to MatSetValues(); 1232 1233 @*/ 1234 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1235 { 1236 int ierr,(*f)(Mat,int *); 1237 1238 PetscFunctionBegin; 1239 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1240 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1241 if (f) { 1242 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1243 } else { 1244 SETERRQ(1,"Wrong type of matrix to set column indices"); 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1251 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1252 { 1253 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1254 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1255 PetscReal atmp; 1256 PetscScalar *x,zero = 0.0; 1257 MatScalar *aa; 1258 int ncols,brow,krow,kcol; 1259 1260 PetscFunctionBegin; 1261 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1262 bs = a->bs; 1263 aa = a->a; 1264 ai = a->i; 1265 aj = a->j; 1266 mbs = a->mbs; 1267 1268 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1269 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1270 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1271 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1272 for (i=0; i<mbs; i++) { 1273 ncols = ai[1] - ai[0]; ai++; 1274 brow = bs*i; 1275 for (j=0; j<ncols; j++){ 1276 /* bcol = bs*(*aj); */ 1277 for (kcol=0; kcol<bs; kcol++){ 1278 for (krow=0; krow<bs; krow++){ 1279 atmp = PetscAbsScalar(*aa); aa++; 1280 row = brow + krow; /* row index */ 1281 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1282 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1283 } 1284 } 1285 aj++; 1286 } 1287 } 1288 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1294 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1295 { 1296 int ierr; 1297 1298 PetscFunctionBegin; 1299 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1305 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1306 { 1307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1308 PetscFunctionBegin; 1309 *array = a->a; 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1315 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1316 { 1317 PetscFunctionBegin; 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* -------------------------------------------------------------------*/ 1322 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1323 MatGetRow_SeqBAIJ, 1324 MatRestoreRow_SeqBAIJ, 1325 MatMult_SeqBAIJ_N, 1326 MatMultAdd_SeqBAIJ_N, 1327 MatMultTranspose_SeqBAIJ, 1328 MatMultTransposeAdd_SeqBAIJ, 1329 MatSolve_SeqBAIJ_N, 1330 0, 1331 0, 1332 0, 1333 MatLUFactor_SeqBAIJ, 1334 0, 1335 0, 1336 MatTranspose_SeqBAIJ, 1337 MatGetInfo_SeqBAIJ, 1338 MatEqual_SeqBAIJ, 1339 MatGetDiagonal_SeqBAIJ, 1340 MatDiagonalScale_SeqBAIJ, 1341 MatNorm_SeqBAIJ, 1342 0, 1343 MatAssemblyEnd_SeqBAIJ, 1344 0, 1345 MatSetOption_SeqBAIJ, 1346 MatZeroEntries_SeqBAIJ, 1347 MatZeroRows_SeqBAIJ, 1348 MatLUFactorSymbolic_SeqBAIJ, 1349 MatLUFactorNumeric_SeqBAIJ_N, 1350 0, 1351 0, 1352 MatSetUpPreallocation_SeqBAIJ, 1353 0, 1354 MatGetOwnershipRange_SeqBAIJ, 1355 MatILUFactorSymbolic_SeqBAIJ, 1356 0, 1357 MatGetArray_SeqBAIJ, 1358 MatRestoreArray_SeqBAIJ, 1359 MatDuplicate_SeqBAIJ, 1360 0, 1361 0, 1362 MatILUFactor_SeqBAIJ, 1363 0, 1364 0, 1365 MatGetSubMatrices_SeqBAIJ, 1366 MatIncreaseOverlap_SeqBAIJ, 1367 MatGetValues_SeqBAIJ, 1368 0, 1369 MatPrintHelp_SeqBAIJ, 1370 MatScale_SeqBAIJ, 1371 0, 1372 0, 1373 0, 1374 MatGetBlockSize_SeqBAIJ, 1375 MatGetRowIJ_SeqBAIJ, 1376 MatRestoreRowIJ_SeqBAIJ, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 MatSetValuesBlocked_SeqBAIJ, 1384 MatGetSubMatrix_SeqBAIJ, 1385 MatDestroy_SeqBAIJ, 1386 MatView_SeqBAIJ, 1387 MatGetPetscMaps_Petsc, 1388 0, 1389 0, 1390 0, 1391 0, 1392 0, 1393 0, 1394 MatGetRowMax_SeqBAIJ, 1395 MatConvert_Basic}; 1396 1397 EXTERN_C_BEGIN 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1400 int MatStoreValues_SeqBAIJ(Mat mat) 1401 { 1402 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1403 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1404 int ierr; 1405 1406 PetscFunctionBegin; 1407 if (aij->nonew != 1) { 1408 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1409 } 1410 1411 /* allocate space for values if not already there */ 1412 if (!aij->saved_values) { 1413 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1414 } 1415 1416 /* copy values over */ 1417 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 EXTERN_C_END 1421 1422 EXTERN_C_BEGIN 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1425 int MatRetrieveValues_SeqBAIJ(Mat mat) 1426 { 1427 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1428 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1429 1430 PetscFunctionBegin; 1431 if (aij->nonew != 1) { 1432 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1433 } 1434 if (!aij->saved_values) { 1435 SETERRQ(1,"Must call MatStoreValues(A);first"); 1436 } 1437 1438 /* copy values over */ 1439 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 EXTERN_C_END 1443 1444 EXTERN_C_BEGIN 1445 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1446 EXTERN_C_END 1447 1448 EXTERN_C_BEGIN 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatCreate_SeqBAIJ" 1451 int MatCreate_SeqBAIJ(Mat B) 1452 { 1453 int ierr,size; 1454 Mat_SeqBAIJ *b; 1455 1456 PetscFunctionBegin; 1457 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1458 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1459 1460 B->m = B->M = PetscMax(B->m,B->M); 1461 B->n = B->N = PetscMax(B->n,B->N); 1462 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1463 B->data = (void*)b; 1464 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1465 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1466 B->factor = 0; 1467 B->lupivotthreshold = 1.0; 1468 B->mapping = 0; 1469 b->row = 0; 1470 b->col = 0; 1471 b->icol = 0; 1472 b->reallocs = 0; 1473 b->saved_values = 0; 1474 #if defined(PETSC_USE_MAT_SINGLE) 1475 b->setvalueslen = 0; 1476 b->setvaluescopy = PETSC_NULL; 1477 #endif 1478 b->single_precision_solves = PETSC_FALSE; 1479 1480 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1481 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1482 1483 b->sorted = PETSC_FALSE; 1484 b->roworiented = PETSC_TRUE; 1485 b->nonew = 0; 1486 b->diag = 0; 1487 b->solve_work = 0; 1488 b->mult_work = 0; 1489 b->spptr = 0; 1490 B->info.nz_unneeded = (PetscReal)b->maxnz; 1491 b->keepzeroedrows = PETSC_FALSE; 1492 1493 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1494 "MatStoreValues_SeqBAIJ", 1495 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1496 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1497 "MatRetrieveValues_SeqBAIJ", 1498 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1499 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1500 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1501 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1502 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1503 "MatConvert_SeqBAIJ_SeqAIJ", 1504 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 EXTERN_C_END 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1511 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1512 { 1513 Mat C; 1514 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1515 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1516 1517 PetscFunctionBegin; 1518 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1519 1520 *B = 0; 1521 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1522 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1523 c = (Mat_SeqBAIJ*)C->data; 1524 1525 c->bs = a->bs; 1526 c->bs2 = a->bs2; 1527 c->mbs = a->mbs; 1528 c->nbs = a->nbs; 1529 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1530 1531 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1532 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1533 for (i=0; i<mbs; i++) { 1534 c->imax[i] = a->imax[i]; 1535 c->ilen[i] = a->ilen[i]; 1536 } 1537 1538 /* allocate the matrix space */ 1539 c->singlemalloc = PETSC_TRUE; 1540 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1541 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1542 c->j = (int*)(c->a + nz*bs2); 1543 c->i = c->j + nz; 1544 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1545 if (mbs > 0) { 1546 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1547 if (cpvalues == MAT_COPY_VALUES) { 1548 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1551 } 1552 } 1553 1554 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1555 c->sorted = a->sorted; 1556 c->roworiented = a->roworiented; 1557 c->nonew = a->nonew; 1558 1559 if (a->diag) { 1560 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1561 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1562 for (i=0; i<mbs; i++) { 1563 c->diag[i] = a->diag[i]; 1564 } 1565 } else c->diag = 0; 1566 c->nz = a->nz; 1567 c->maxnz = a->maxnz; 1568 c->solve_work = 0; 1569 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1570 c->mult_work = 0; 1571 C->preallocated = PETSC_TRUE; 1572 C->assembled = PETSC_TRUE; 1573 *B = C; 1574 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 EXTERN_C_BEGIN 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatLoad_SeqBAIJ" 1581 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1582 { 1583 Mat_SeqBAIJ *a; 1584 Mat B; 1585 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1586 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1587 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1588 int *masked,nmask,tmp,bs2,ishift; 1589 PetscScalar *aa; 1590 MPI_Comm comm = ((PetscObject)viewer)->comm; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1594 bs2 = bs*bs; 1595 1596 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1597 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1598 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1599 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1600 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1601 M = header[1]; N = header[2]; nz = header[3]; 1602 1603 if (header[3] < 0) { 1604 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1605 } 1606 1607 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1608 1609 /* 1610 This code adds extra rows to make sure the number of rows is 1611 divisible by the blocksize 1612 */ 1613 mbs = M/bs; 1614 extra_rows = bs - M + bs*(mbs); 1615 if (extra_rows == bs) extra_rows = 0; 1616 else mbs++; 1617 if (extra_rows) { 1618 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1619 } 1620 1621 /* read in row lengths */ 1622 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1623 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1624 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1625 1626 /* read in column indices */ 1627 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1628 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1629 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1630 1631 /* loop over row lengths determining block row lengths */ 1632 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1633 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1634 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1635 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1636 masked = mask + mbs; 1637 rowcount = 0; nzcount = 0; 1638 for (i=0; i<mbs; i++) { 1639 nmask = 0; 1640 for (j=0; j<bs; j++) { 1641 kmax = rowlengths[rowcount]; 1642 for (k=0; k<kmax; k++) { 1643 tmp = jj[nzcount++]/bs; 1644 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1645 } 1646 rowcount++; 1647 } 1648 browlengths[i] += nmask; 1649 /* zero out the mask elements we set */ 1650 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1651 } 1652 1653 /* create our matrix */ 1654 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1655 B = *A; 1656 a = (Mat_SeqBAIJ*)B->data; 1657 1658 /* set matrix "i" values */ 1659 a->i[0] = 0; 1660 for (i=1; i<= mbs; i++) { 1661 a->i[i] = a->i[i-1] + browlengths[i-1]; 1662 a->ilen[i-1] = browlengths[i-1]; 1663 } 1664 a->nz = 0; 1665 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1666 1667 /* read in nonzero values */ 1668 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1669 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1670 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1671 1672 /* set "a" and "j" values into matrix */ 1673 nzcount = 0; jcount = 0; 1674 for (i=0; i<mbs; i++) { 1675 nzcountb = nzcount; 1676 nmask = 0; 1677 for (j=0; j<bs; j++) { 1678 kmax = rowlengths[i*bs+j]; 1679 for (k=0; k<kmax; k++) { 1680 tmp = jj[nzcount++]/bs; 1681 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1682 } 1683 } 1684 /* sort the masked values */ 1685 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1686 1687 /* set "j" values into matrix */ 1688 maskcount = 1; 1689 for (j=0; j<nmask; j++) { 1690 a->j[jcount++] = masked[j]; 1691 mask[masked[j]] = maskcount++; 1692 } 1693 /* set "a" values into matrix */ 1694 ishift = bs2*a->i[i]; 1695 for (j=0; j<bs; j++) { 1696 kmax = rowlengths[i*bs+j]; 1697 for (k=0; k<kmax; k++) { 1698 tmp = jj[nzcountb]/bs ; 1699 block = mask[tmp] - 1; 1700 point = jj[nzcountb] - bs*tmp; 1701 idx = ishift + bs2*block + j + bs*point; 1702 a->a[idx] = (MatScalar)aa[nzcountb++]; 1703 } 1704 } 1705 /* zero out the mask elements we set */ 1706 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1707 } 1708 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1709 1710 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1711 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1712 ierr = PetscFree(aa);CHKERRQ(ierr); 1713 ierr = PetscFree(jj);CHKERRQ(ierr); 1714 ierr = PetscFree(mask);CHKERRQ(ierr); 1715 1716 B->assembled = PETSC_TRUE; 1717 1718 ierr = MatView_Private(B);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 EXTERN_C_END 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatCreateSeqBAIJ" 1725 /*@C 1726 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1727 compressed row) format. For good matrix assembly performance the 1728 user should preallocate the matrix storage by setting the parameter nz 1729 (or the array nnz). By setting these parameters accurately, performance 1730 during matrix assembly can be increased by more than a factor of 50. 1731 1732 Collective on MPI_Comm 1733 1734 Input Parameters: 1735 + comm - MPI communicator, set to PETSC_COMM_SELF 1736 . bs - size of block 1737 . m - number of rows 1738 . n - number of columns 1739 . nz - number of nonzero blocks per block row (same for all rows) 1740 - nnz - array containing the number of nonzero blocks in the various block rows 1741 (possibly different for each block row) or PETSC_NULL 1742 1743 Output Parameter: 1744 . A - the matrix 1745 1746 Options Database Keys: 1747 . -mat_no_unroll - uses code that does not unroll the loops in the 1748 block calculations (much slower) 1749 . -mat_block_size - size of the blocks to use 1750 1751 Level: intermediate 1752 1753 Notes: 1754 A nonzero block is any block that as 1 or more nonzeros in it 1755 1756 The block AIJ format is fully compatible with standard Fortran 77 1757 storage. That is, the stored row and column indices can begin at 1758 either one (as in Fortran) or zero. See the users' manual for details. 1759 1760 Specify the preallocated storage with either nz or nnz (not both). 1761 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1762 allocation. For additional details, see the users manual chapter on 1763 matrices. 1764 1765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1766 @*/ 1767 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1768 { 1769 int ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1773 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1774 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1780 /*@C 1781 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1782 per row in the matrix. For good matrix assembly performance the 1783 user should preallocate the matrix storage by setting the parameter nz 1784 (or the array nnz). By setting these parameters accurately, performance 1785 during matrix assembly can be increased by more than a factor of 50. 1786 1787 Collective on MPI_Comm 1788 1789 Input Parameters: 1790 + A - the matrix 1791 . bs - size of block 1792 . nz - number of block nonzeros per block row (same for all rows) 1793 - nnz - array containing the number of block nonzeros in the various block rows 1794 (possibly different for each block row) or PETSC_NULL 1795 1796 Options Database Keys: 1797 . -mat_no_unroll - uses code that does not unroll the loops in the 1798 block calculations (much slower) 1799 . -mat_block_size - size of the blocks to use 1800 1801 Level: intermediate 1802 1803 Notes: 1804 The block AIJ format is fully compatible with standard Fortran 77 1805 storage. That is, the stored row and column indices can begin at 1806 either one (as in Fortran) or zero. See the users' manual for details. 1807 1808 Specify the preallocated storage with either nz or nnz (not both). 1809 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1810 allocation. For additional details, see the users manual chapter on 1811 matrices. 1812 1813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1814 @*/ 1815 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1816 { 1817 Mat_SeqBAIJ *b; 1818 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1819 PetscTruth flg; 1820 1821 PetscFunctionBegin; 1822 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1823 if (!flg) PetscFunctionReturn(0); 1824 1825 B->preallocated = PETSC_TRUE; 1826 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1827 if (nnz && newbs != bs) { 1828 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1829 } 1830 bs = newbs; 1831 1832 mbs = B->m/bs; 1833 nbs = B->n/bs; 1834 bs2 = bs*bs; 1835 1836 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1837 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1838 } 1839 1840 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1841 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1842 if (nnz) { 1843 for (i=0; i<mbs; i++) { 1844 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1845 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1846 } 1847 } 1848 1849 b = (Mat_SeqBAIJ*)B->data; 1850 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1851 B->ops->solve = MatSolve_SeqBAIJ_Update; 1852 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1853 if (!flg) { 1854 switch (bs) { 1855 case 1: 1856 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1857 B->ops->mult = MatMult_SeqBAIJ_1; 1858 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1859 break; 1860 case 2: 1861 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1862 B->ops->mult = MatMult_SeqBAIJ_2; 1863 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1864 break; 1865 case 3: 1866 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1867 B->ops->mult = MatMult_SeqBAIJ_3; 1868 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1869 break; 1870 case 4: 1871 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1872 B->ops->mult = MatMult_SeqBAIJ_4; 1873 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1874 break; 1875 case 5: 1876 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1877 B->ops->mult = MatMult_SeqBAIJ_5; 1878 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1879 break; 1880 case 6: 1881 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1882 B->ops->mult = MatMult_SeqBAIJ_6; 1883 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1884 break; 1885 case 7: 1886 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1887 B->ops->mult = MatMult_SeqBAIJ_7; 1888 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1889 break; 1890 default: 1891 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1892 B->ops->mult = MatMult_SeqBAIJ_N; 1893 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1894 break; 1895 } 1896 } 1897 b->bs = bs; 1898 b->mbs = mbs; 1899 b->nbs = nbs; 1900 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1901 if (!nnz) { 1902 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1903 else if (nz <= 0) nz = 1; 1904 for (i=0; i<mbs; i++) b->imax[i] = nz; 1905 nz = nz*mbs; 1906 } else { 1907 nz = 0; 1908 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1909 } 1910 1911 /* allocate the matrix space */ 1912 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1913 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1914 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1915 b->j = (int*)(b->a + nz*bs2); 1916 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1917 b->i = b->j + nz; 1918 b->singlemalloc = PETSC_TRUE; 1919 1920 b->i[0] = 0; 1921 for (i=1; i<mbs+1; i++) { 1922 b->i[i] = b->i[i-1] + b->imax[i-1]; 1923 } 1924 1925 /* b->ilen will count nonzeros in each block row so far. */ 1926 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1927 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1928 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1929 1930 b->bs = bs; 1931 b->bs2 = bs2; 1932 b->mbs = mbs; 1933 b->nz = 0; 1934 b->maxnz = nz*bs2; 1935 B->info.nz_unneeded = (PetscReal)b->maxnz; 1936 PetscFunctionReturn(0); 1937 } 1938 1939