1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.44 1996/05/02 15:42:40 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 45 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 46 PetscFree(ia); PetscFree(ja); 47 } else { 48 if (ishift == oshift) { 49 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50 } 51 else if (ishift == -1) { 52 /* temporarily subtract 1 from i and j indices */ 53 int nz = a->i[n] - 1; 54 for ( i=0; i<nz; i++ ) a->j[i]--; 55 for ( i=0; i<n+1; i++ ) a->i[i]--; 56 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 } else { 60 /* temporarily add 1 to i and j indices */ 61 int nz = a->i[n] - 1; 62 for ( i=0; i<nz; i++ ) a->j[i]++; 63 for ( i=0; i<n+1; i++ ) a->i[i]++; 64 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65 for ( i=0; i<nz; i++ ) a->j[i]--; 66 for ( i=0; i<n+1; i++ ) a->i[i]--; 67 } 68 } 69 return 0; 70 } 71 72 /* 73 Adds diagonal pointers to sparse matrix structure. 74 */ 75 76 int MatMarkDiag_SeqBAIJ(Mat A) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 79 int i,j, *diag, m = a->mbs; 80 81 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82 PLogObjectMemory(A,(m+1)*sizeof(int)); 83 for ( i=0; i<m; i++ ) { 84 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85 if (a->j[j] == i) { 86 diag[i] = j; 87 break; 88 } 89 } 90 } 91 a->diag = diag; 92 return 0; 93 } 94 95 #include "draw.h" 96 #include "pinclude/pviewer.h" 97 #include "sys.h" 98 99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 100 { 101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 102 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103 Scalar *aa; 104 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 col_lens[1] = a->m; 109 col_lens[2] = a->n; 110 col_lens[3] = a->nz*bs2; 111 112 /* store lengths of each row and write (including header) to file */ 113 count = 0; 114 for ( i=0; i<a->mbs; i++ ) { 115 for ( j=0; j<bs; j++ ) { 116 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117 } 118 } 119 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 120 PetscFree(col_lens); 121 122 /* store column indices (zero start index) */ 123 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124 count = 0; 125 for ( i=0; i<a->mbs; i++ ) { 126 for ( j=0; j<bs; j++ ) { 127 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128 for ( l=0; l<bs; l++ ) { 129 jj[count++] = bs*a->j[k] + l; 130 } 131 } 132 } 133 } 134 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135 PetscFree(jj); 136 137 /* store nonzero values */ 138 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139 count = 0; 140 for ( i=0; i<a->mbs; i++ ) { 141 for ( j=0; j<bs; j++ ) { 142 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143 for ( l=0; l<bs; l++ ) { 144 aa[count++] = a->a[bs2*k + l*bs + j]; 145 } 146 } 147 } 148 } 149 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150 PetscFree(aa); 151 return 0; 152 } 153 154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 155 { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 157 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 158 FILE *fd; 159 char *outputname; 160 161 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 162 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 163 ierr = ViewerGetFormat(viewer,&format); 164 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 165 fprintf(fd," block size is %d\n",bs); 166 } 167 else if (format == ASCII_FORMAT_MATLAB) { 168 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 169 } 170 else if (format == ASCII_FORMAT_COMMON) { 171 for ( i=0; i<a->mbs; i++ ) { 172 for ( j=0; j<bs; j++ ) { 173 fprintf(fd,"row %d:",i*bs+j); 174 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 175 for ( l=0; l<bs; l++ ) { 176 #if defined(PETSC_COMPLEX) 177 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 178 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 179 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 180 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 181 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 182 #else 183 if (a->a[bs2*k + l*bs + j] != 0.0) 184 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 185 #endif 186 } 187 } 188 fprintf(fd,"\n"); 189 } 190 } 191 } 192 else { 193 for ( i=0; i<a->mbs; i++ ) { 194 for ( j=0; j<bs; j++ ) { 195 fprintf(fd,"row %d:",i*bs+j); 196 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197 for ( l=0; l<bs; l++ ) { 198 #if defined(PETSC_COMPLEX) 199 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 200 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 201 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 202 } 203 else { 204 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 205 } 206 #else 207 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 208 #endif 209 } 210 } 211 fprintf(fd,"\n"); 212 } 213 } 214 } 215 fflush(fd); 216 return 0; 217 } 218 219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 220 { 221 Mat A = (Mat) obj; 222 ViewerType vtype; 223 int ierr; 224 225 if (!viewer) { 226 viewer = STDOUT_VIEWER_SELF; 227 } 228 229 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 230 if (vtype == MATLAB_VIEWER) { 231 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 232 } 233 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 234 return MatView_SeqBAIJ_ASCII(A,viewer); 235 } 236 else if (vtype == BINARY_FILE_VIEWER) { 237 return MatView_SeqBAIJ_Binary(A,viewer); 238 } 239 else if (vtype == DRAW_VIEWER) { 240 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 241 } 242 return 0; 243 } 244 245 #define CHUNKSIZE 10 246 247 /* This version has row oriented v */ 248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 251 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 252 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 253 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 254 int ridx,cidx,bs2=a->bs2; 255 Scalar *ap,value,*aa=a->a,*bap; 256 257 for ( k=0; k<m; k++ ) { /* loop over added rows */ 258 row = im[k]; brow = row/bs; 259 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 260 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 261 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 262 rmax = imax[brow]; nrow = ailen[brow]; 263 low = 0; 264 for ( l=0; l<n; l++ ) { /* loop over added columns */ 265 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 266 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 267 col = in[l]; bcol = col/bs; 268 ridx = row % bs; cidx = col % bs; 269 if (roworiented) { 270 value = *v++; 271 } 272 else { 273 value = v[k + l*m]; 274 } 275 if (!sorted) low = 0; high = nrow; 276 while (high-low > 5) { 277 t = (low+high)/2; 278 if (rp[t] > bcol) high = t; 279 else low = t; 280 } 281 for ( i=low; i<high; i++ ) { 282 if (rp[i] > bcol) break; 283 if (rp[i] == bcol) { 284 bap = ap + bs2*i + bs*cidx + ridx; 285 if (is == ADD_VALUES) *bap += value; 286 else *bap = value; 287 goto noinsert; 288 } 289 } 290 if (nonew) goto noinsert; 291 if (nrow >= rmax) { 292 /* there is no extra room in row, therefore enlarge */ 293 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 294 Scalar *new_a; 295 296 /* malloc new storage space */ 297 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 298 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 299 new_j = (int *) (new_a + bs2*new_nz); 300 new_i = new_j + new_nz; 301 302 /* copy over old data into new slots */ 303 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 304 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 305 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 306 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 307 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 308 len*sizeof(int)); 309 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 310 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 311 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 312 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 313 /* free up old matrix storage */ 314 PetscFree(a->a); 315 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 316 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 317 a->singlemalloc = 1; 318 319 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 320 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 321 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 322 a->maxnz += CHUNKSIZE; 323 a->reallocs++; 324 a->nz++; 325 } 326 N = nrow++ - 1; 327 /* shift up all the later entries in this row */ 328 for ( ii=N; ii>=i; ii-- ) { 329 rp[ii+1] = rp[ii]; 330 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 331 } 332 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 333 rp[i] = bcol; 334 ap[bs2*i + bs*cidx + ridx] = value; 335 noinsert:; 336 low = i; 337 } 338 ailen[brow] = nrow; 339 } 340 return 0; 341 } 342 343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 344 { 345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 346 *m = a->m; *n = a->n; 347 return 0; 348 } 349 350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 351 { 352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 353 *m = 0; *n = a->m; 354 return 0; 355 } 356 357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 358 { 359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 360 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 361 Scalar *aa,*v_i,*aa_i; 362 363 bs = a->bs; 364 ai = a->i; 365 aj = a->j; 366 aa = a->a; 367 bs2 = a->bs2; 368 369 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 370 371 bn = row/bs; /* Block number */ 372 bp = row % bs; /* Block Position */ 373 M = ai[bn+1] - ai[bn]; 374 *nz = bs*M; 375 376 if (v) { 377 *v = 0; 378 if (*nz) { 379 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 380 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 381 v_i = *v + i*bs; 382 aa_i = aa + bs2*(ai[bn] + i); 383 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 384 } 385 } 386 } 387 388 if (idx) { 389 *idx = 0; 390 if (*nz) { 391 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 392 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 393 idx_i = *idx + i*bs; 394 itmp = bs*aj[ai[bn] + i]; 395 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 396 } 397 } 398 } 399 return 0; 400 } 401 402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 403 { 404 if (idx) {if (*idx) PetscFree(*idx);} 405 if (v) {if (*v) PetscFree(*v);} 406 return 0; 407 } 408 409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 410 { 411 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 412 Mat C; 413 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 414 int *rows,*cols,bs2=a->bs2; 415 Scalar *array=a->a; 416 417 if (B==PETSC_NULL && mbs!=nbs) 418 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 419 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 420 PetscMemzero(col,(1+nbs)*sizeof(int)); 421 422 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 423 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 424 PetscFree(col); 425 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 426 cols = rows + bs; 427 for ( i=0; i<mbs; i++ ) { 428 cols[0] = i*bs; 429 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 430 len = ai[i+1] - ai[i]; 431 for ( j=0; j<len; j++ ) { 432 rows[0] = (*aj++)*bs; 433 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 434 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 435 array += bs2; 436 } 437 } 438 PetscFree(rows); 439 440 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 441 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 442 443 if (B != PETSC_NULL) { 444 *B = C; 445 } else { 446 /* This isn't really an in-place transpose */ 447 PetscFree(a->a); 448 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 449 if (a->diag) PetscFree(a->diag); 450 if (a->ilen) PetscFree(a->ilen); 451 if (a->imax) PetscFree(a->imax); 452 if (a->solve_work) PetscFree(a->solve_work); 453 PetscFree(a); 454 PetscMemcpy(A,C,sizeof(struct _Mat)); 455 PetscHeaderDestroy(C); 456 } 457 return 0; 458 } 459 460 461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 462 { 463 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 464 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 465 int m = a->m,*ip, N, *ailen = a->ilen; 466 int mbs = a->mbs, bs2 = a->bs2; 467 Scalar *aa = a->a, *ap; 468 469 if (mode == FLUSH_ASSEMBLY) return 0; 470 471 for ( i=1; i<mbs; i++ ) { 472 /* move each row back by the amount of empty slots (fshift) before it*/ 473 fshift += imax[i-1] - ailen[i-1]; 474 if (fshift) { 475 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 476 N = ailen[i]; 477 for ( j=0; j<N; j++ ) { 478 ip[j-fshift] = ip[j]; 479 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 480 } 481 } 482 ai[i] = ai[i-1] + ailen[i-1]; 483 } 484 if (mbs) { 485 fshift += imax[mbs-1] - ailen[mbs-1]; 486 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 487 } 488 /* reset ilen and imax for each row */ 489 for ( i=0; i<mbs; i++ ) { 490 ailen[i] = imax[i] = ai[i+1] - ai[i]; 491 } 492 a->nz = ai[mbs]; 493 494 /* diagonals may have moved, so kill the diagonal pointers */ 495 if (fshift && a->diag) { 496 PetscFree(a->diag); 497 PLogObjectMemory(A,-(m+1)*sizeof(int)); 498 a->diag = 0; 499 } 500 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 501 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 502 a->reallocs); 503 return 0; 504 } 505 506 static int MatZeroEntries_SeqBAIJ(Mat A) 507 { 508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 509 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 510 return 0; 511 } 512 513 int MatDestroy_SeqBAIJ(PetscObject obj) 514 { 515 Mat A = (Mat) obj; 516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 517 518 #if defined(PETSC_LOG) 519 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 520 #endif 521 PetscFree(a->a); 522 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 523 if (a->diag) PetscFree(a->diag); 524 if (a->ilen) PetscFree(a->ilen); 525 if (a->imax) PetscFree(a->imax); 526 if (a->solve_work) PetscFree(a->solve_work); 527 if (a->mult_work) PetscFree(a->mult_work); 528 PetscFree(a); 529 PLogObjectDestroy(A); 530 PetscHeaderDestroy(A); 531 return 0; 532 } 533 534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 535 { 536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 537 if (op == ROW_ORIENTED) a->roworiented = 1; 538 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 539 else if (op == COLUMNS_SORTED) a->sorted = 1; 540 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 541 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 542 else if (op == ROWS_SORTED || 543 op == SYMMETRIC_MATRIX || 544 op == STRUCTURALLY_SYMMETRIC_MATRIX || 545 op == YES_NEW_DIAGONALS) 546 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 547 else if (op == NO_NEW_DIAGONALS) 548 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 549 else 550 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 551 return 0; 552 } 553 554 555 /* -------------------------------------------------------*/ 556 /* Should check that shapes of vectors and matrices match */ 557 /* -------------------------------------------------------*/ 558 #include "pinclude/plapack.h" 559 560 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec zz) 561 { 562 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 563 Scalar *xg,*zg; 564 register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5; 565 register Scalar x1,x2,x3,x4,x5; 566 int mbs=a->mbs,i,*idx,*ii; 567 int bs=a->bs,j,n,bs2=a->bs2,ierr; 568 569 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 570 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 571 572 idx = a->j; 573 v = a->a; 574 ii = a->i; 575 576 switch (bs) { 577 case 1: 578 for ( i=0; i<mbs; i++ ) { 579 n = ii[1] - ii[0]; ii++; 580 sum = 0.0; 581 while (n--) sum += *v++ * x[*idx++]; 582 z[i] = sum; 583 } 584 break; 585 case 2: 586 for ( i=0; i<mbs; i++ ) { 587 n = ii[1] - ii[0]; ii++; 588 sum1 = 0.0; sum2 = 0.0; 589 for ( j=0; j<n; j++ ) { 590 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 591 sum1 += v[0]*x1 + v[2]*x2; 592 sum2 += v[1]*x1 + v[3]*x2; 593 v += 4; 594 } 595 z[0] = sum1; z[1] = sum2; 596 z += 2; 597 } 598 break; 599 case 3: 600 for ( i=0; i<mbs; i++ ) { 601 n = ii[1] - ii[0]; ii++; 602 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 603 for ( j=0; j<n; j++ ) { 604 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 605 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 606 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 607 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 608 v += 9; 609 } 610 z[0] = sum1; z[1] = sum2; z[2] = sum3; 611 z += 3; 612 } 613 break; 614 case 4: 615 for ( i=0; i<mbs; i++ ) { 616 n = ii[1] - ii[0]; ii++; 617 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 618 for ( j=0; j<n; j++ ) { 619 xb = x + 4*(*idx++); 620 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 621 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 622 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 623 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 624 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 625 v += 16; 626 } 627 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 628 z += 4; 629 } 630 break; 631 case 5: 632 for ( i=0; i<mbs; i++ ) { 633 n = ii[1] - ii[0]; ii++; 634 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 635 for ( j=0; j<n; j++ ) { 636 xb = x + 5*(*idx++); 637 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 638 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 639 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 640 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 641 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 642 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 643 v += 25; 644 } 645 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 646 z += 5; 647 } 648 break; 649 /* block sizes larger then 5 by 5 are handled by BLAS */ 650 default: { 651 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0; 652 if (!a->mult_work) { 653 k = PetscMax(a->m,a->n); 654 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 655 CHKPTRQ(a->mult_work); 656 } 657 work = a->mult_work; 658 for ( i=0; i<mbs; i++ ) { 659 n = ii[1] - ii[0]; ii++; 660 ncols = n*bs; 661 workt = work; 662 for ( j=0; j<n; j++ ) { 663 xb = x + bs*(*idx++); 664 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 665 workt += bs; 666 } 667 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 668 v += n*bs2; 669 z += bs; 670 } 671 } 672 } 673 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 674 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 675 PLogFlops(2*a->nz*bs2 - a->m); 676 return 0; 677 } 678 679 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 680 { 681 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 682 Scalar *xg,*zg,*zb; 683 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 684 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 685 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 686 687 688 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 689 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 690 PetscMemzero(z,N*sizeof(Scalar)); 691 692 idx = a->j; 693 v = a->a; 694 ii = a->i; 695 696 switch (bs) { 697 case 1: 698 for ( i=0; i<mbs; i++ ) { 699 n = ii[1] - ii[0]; ii++; 700 xb = x + i; x1 = xb[0]; 701 ib = idx + ai[i]; 702 for ( j=0; j<n; j++ ) { 703 rval = ib[j]; 704 z[rval] += *v++ * x1; 705 } 706 } 707 break; 708 case 2: 709 for ( i=0; i<mbs; i++ ) { 710 n = ii[1] - ii[0]; ii++; 711 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 712 ib = idx + ai[i]; 713 for ( j=0; j<n; j++ ) { 714 rval = ib[j]*2; 715 z[rval++] += v[0]*x1 + v[1]*x2; 716 z[rval++] += v[2]*x1 + v[3]*x2; 717 v += 4; 718 } 719 } 720 break; 721 case 3: 722 for ( i=0; i<mbs; i++ ) { 723 n = ii[1] - ii[0]; ii++; 724 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 725 ib = idx + ai[i]; 726 for ( j=0; j<n; j++ ) { 727 rval = ib[j]*3; 728 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 729 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 730 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 731 v += 9; 732 } 733 } 734 break; 735 case 4: 736 for ( i=0; i<mbs; i++ ) { 737 n = ii[1] - ii[0]; ii++; 738 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 739 ib = idx + ai[i]; 740 for ( j=0; j<n; j++ ) { 741 rval = ib[j]*4; 742 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 743 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 744 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 745 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 746 v += 16; 747 } 748 } 749 break; 750 case 5: 751 for ( i=0; i<mbs; i++ ) { 752 n = ii[1] - ii[0]; ii++; 753 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 754 x4 = xb[3]; x5 = xb[4]; 755 ib = idx + ai[i]; 756 for ( j=0; j<n; j++ ) { 757 rval = ib[j]*5; 758 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 759 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 760 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 761 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 762 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 763 v += 25; 764 } 765 } 766 break; 767 /* block sizes larger then 5 by 5 are handled by BLAS */ 768 default: { 769 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 770 if (!a->mult_work) { 771 k = PetscMax(a->m,a->n); 772 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 773 CHKPTRQ(a->mult_work); 774 } 775 work = a->mult_work; 776 for ( i=0; i<mbs; i++ ) { 777 n = ii[1] - ii[0]; ii++; 778 ncols = n*bs; 779 PetscMemzero(work,ncols*sizeof(Scalar)); 780 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 781 v += n*bs2; 782 x += bs; 783 workt = work; 784 for ( j=0; j<n; j++ ) { 785 zb = z + bs*(*idx++); 786 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 787 workt += bs; 788 } 789 } 790 } 791 } 792 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 793 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 794 return 0; 795 } 796 797 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 798 { 799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 800 if (nz) *nz = a->bs2*a->nz; 801 if (nza) *nza = a->maxnz; 802 if (mem) *mem = (int)A->mem; 803 return 0; 804 } 805 806 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 809 810 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 811 812 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 813 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 814 (a->nz != b->nz)) { 815 *flg = PETSC_FALSE; return 0; 816 } 817 818 /* if the a->i are the same */ 819 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 820 *flg = PETSC_FALSE; return 0; 821 } 822 823 /* if a->j are the same */ 824 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 825 *flg = PETSC_FALSE; return 0; 826 } 827 828 /* if a->a are the same */ 829 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 830 *flg = PETSC_FALSE; return 0; 831 } 832 *flg = PETSC_TRUE; 833 return 0; 834 835 } 836 837 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 838 { 839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 840 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 841 Scalar *x, zero = 0.0,*aa,*aa_j; 842 843 bs = a->bs; 844 aa = a->a; 845 ai = a->i; 846 aj = a->j; 847 ambs = a->mbs; 848 bs2 = a->bs2; 849 850 VecSet(&zero,v); 851 VecGetArray(v,&x); VecGetLocalSize(v,&n); 852 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 853 for ( i=0; i<ambs; i++ ) { 854 for ( j=ai[i]; j<ai[i+1]; j++ ) { 855 if (aj[j] == i) { 856 row = i*bs; 857 aa_j = aa+j*bs2; 858 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 859 break; 860 } 861 } 862 } 863 return 0; 864 } 865 866 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 867 { 868 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 869 Scalar *l,*r,x,*v,*aa,*li,*ri; 870 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 871 872 ai = a->i; 873 aj = a->j; 874 aa = a->a; 875 m = a->m; 876 n = a->n; 877 bs = a->bs; 878 mbs = a->mbs; 879 bs2 = a->bs2; 880 if (ll) { 881 VecGetArray(ll,&l); VecGetSize(ll,&lm); 882 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 883 for ( i=0; i<mbs; i++ ) { /* for each block row */ 884 M = ai[i+1] - ai[i]; 885 li = l + i*bs; 886 v = aa + bs2*ai[i]; 887 for ( j=0; j<M; j++ ) { /* for each block */ 888 for ( k=0; k<bs2; k++ ) { 889 (*v++) *= li[k%bs]; 890 } 891 } 892 } 893 } 894 895 if (rr) { 896 VecGetArray(rr,&r); VecGetSize(rr,&rn); 897 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 898 for ( i=0; i<mbs; i++ ) { /* for each block row */ 899 M = ai[i+1] - ai[i]; 900 v = aa + bs2*ai[i]; 901 for ( j=0; j<M; j++ ) { /* for each block */ 902 ri = r + bs*aj[ai[i]+j]; 903 for ( k=0; k<bs; k++ ) { 904 x = ri[k]; 905 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 906 } 907 } 908 } 909 } 910 return 0; 911 } 912 913 914 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 915 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 916 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 917 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 918 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 919 920 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 921 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 922 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 923 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 924 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 925 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 926 927 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 928 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 929 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 930 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 931 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 932 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 933 934 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 935 { 936 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 937 Scalar *v = a->a; 938 double sum = 0.0; 939 int i,nz=a->nz,bs2=a->bs2; 940 941 if (type == NORM_FROBENIUS) { 942 for (i=0; i< bs2*nz; i++ ) { 943 #if defined(PETSC_COMPLEX) 944 sum += real(conj(*v)*(*v)); v++; 945 #else 946 sum += (*v)*(*v); v++; 947 #endif 948 } 949 *norm = sqrt(sum); 950 } 951 else { 952 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 953 } 954 return 0; 955 } 956 957 /* 958 note: This can only work for identity for row and col. It would 959 be good to check this and otherwise generate an error. 960 */ 961 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 962 { 963 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 964 Mat outA; 965 int ierr; 966 967 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 968 969 outA = inA; 970 inA->factor = FACTOR_LU; 971 a->row = row; 972 a->col = col; 973 974 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 975 976 if (!a->diag) { 977 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 978 } 979 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 980 return 0; 981 } 982 983 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 984 { 985 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 986 int one = 1, totalnz = a->bs2*a->nz; 987 BLscal_( &totalnz, alpha, a->a, &one ); 988 PLogFlops(totalnz); 989 return 0; 990 } 991 992 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 993 { 994 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 995 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 996 int *ai = a->i, *ailen = a->ilen; 997 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 998 Scalar *ap, *aa = a->a, zero = 0.0; 999 1000 for ( k=0; k<m; k++ ) { /* loop over rows */ 1001 row = im[k]; brow = row/bs; 1002 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1003 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1004 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1005 nrow = ailen[brow]; 1006 for ( l=0; l<n; l++ ) { /* loop over columns */ 1007 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1008 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1009 col = in[l] ; 1010 bcol = col/bs; 1011 cidx = col%bs; 1012 ridx = row%bs; 1013 high = nrow; 1014 low = 0; /* assume unsorted */ 1015 while (high-low > 5) { 1016 t = (low+high)/2; 1017 if (rp[t] > bcol) high = t; 1018 else low = t; 1019 } 1020 for ( i=low; i<high; i++ ) { 1021 if (rp[i] > bcol) break; 1022 if (rp[i] == bcol) { 1023 *v++ = ap[bs2*i+bs*cidx+ridx]; 1024 goto finished; 1025 } 1026 } 1027 *v++ = zero; 1028 finished:; 1029 } 1030 } 1031 return 0; 1032 } 1033 1034 /* -------------------------------------------------------------------*/ 1035 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1036 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1037 MatMult_SeqBAIJ,0, 1038 MatMultTrans_SeqBAIJ,0, 1039 MatSolve_SeqBAIJ_N,0, 1040 0,0, 1041 MatLUFactor_SeqBAIJ,0, 1042 0, 1043 MatTranspose_SeqBAIJ, 1044 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1045 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1046 0,MatAssemblyEnd_SeqBAIJ, 1047 0, 1048 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1049 MatGetReordering_SeqBAIJ, 1050 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1051 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1052 MatILUFactorSymbolic_SeqBAIJ,0, 1053 0,0,/* MatConvert_SeqBAIJ */ 0, 1054 MatGetSubMatrix_SeqBAIJ,0, 1055 MatConvertSameType_SeqBAIJ,0,0, 1056 MatILUFactor_SeqBAIJ,0,0, 1057 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1058 MatGetValues_SeqBAIJ,0, 1059 0,MatScale_SeqBAIJ, 1060 0}; 1061 1062 /*@C 1063 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1064 compressed row) format. For good matrix assembly performance the 1065 user should preallocate the matrix storage by setting the parameter nz 1066 (or nzz). By setting these parameters accurately, performance can be 1067 increased by more than a factor of 50. 1068 1069 Input Parameters: 1070 . comm - MPI communicator, set to MPI_COMM_SELF 1071 . bs - size of block 1072 . m - number of rows 1073 . n - number of columns 1074 . nz - number of block nonzeros per block row (same for all rows) 1075 . nzz - number of block nonzeros per block row or PETSC_NULL 1076 (possibly different for each row) 1077 1078 Output Parameter: 1079 . A - the matrix 1080 1081 Notes: 1082 The block AIJ format is fully compatible with standard Fortran 77 1083 storage. That is, the stored row and column indices can begin at 1084 either one (as in Fortran) or zero. See the users' manual for details. 1085 1086 Specify the preallocated storage with either nz or nnz (not both). 1087 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1088 allocation. For additional details, see the users manual chapter on 1089 matrices and the file $(PETSC_DIR)/Performance. 1090 1091 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1092 @*/ 1093 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1094 { 1095 Mat B; 1096 Mat_SeqBAIJ *b; 1097 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1098 1099 if (mbs*bs!=m || nbs*bs!=n) 1100 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1101 1102 *A = 0; 1103 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1104 PLogObjectCreate(B); 1105 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1106 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1107 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1108 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1109 if (!flg) { 1110 switch (bs) { 1111 case 1: 1112 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1113 B->ops.solve = MatSolve_SeqBAIJ_1; 1114 break; 1115 case 2: 1116 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1117 B->ops.solve = MatSolve_SeqBAIJ_2; 1118 break; 1119 case 3: 1120 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1121 B->ops.solve = MatSolve_SeqBAIJ_3; 1122 break; 1123 case 4: 1124 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1125 B->ops.solve = MatSolve_SeqBAIJ_4; 1126 break; 1127 case 5: 1128 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1129 B->ops.solve = MatSolve_SeqBAIJ_5; 1130 break; 1131 } 1132 } 1133 B->destroy = MatDestroy_SeqBAIJ; 1134 B->view = MatView_SeqBAIJ; 1135 B->factor = 0; 1136 B->lupivotthreshold = 1.0; 1137 b->row = 0; 1138 b->col = 0; 1139 b->reallocs = 0; 1140 1141 b->m = m; B->m = m; B->M = m; 1142 b->n = n; B->n = n; B->N = n; 1143 b->mbs = mbs; 1144 b->nbs = nbs; 1145 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1146 if (nnz == PETSC_NULL) { 1147 if (nz == PETSC_DEFAULT) nz = 5; 1148 else if (nz <= 0) nz = 1; 1149 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1150 nz = nz*mbs; 1151 } 1152 else { 1153 nz = 0; 1154 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1155 } 1156 1157 /* allocate the matrix space */ 1158 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1159 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1160 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1161 b->j = (int *) (b->a + nz*bs2); 1162 PetscMemzero(b->j,nz*sizeof(int)); 1163 b->i = b->j + nz; 1164 b->singlemalloc = 1; 1165 1166 b->i[0] = 0; 1167 for (i=1; i<mbs+1; i++) { 1168 b->i[i] = b->i[i-1] + b->imax[i-1]; 1169 } 1170 1171 /* b->ilen will count nonzeros in each block row so far. */ 1172 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1173 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1174 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1175 1176 b->bs = bs; 1177 b->bs2 = bs2; 1178 b->mbs = mbs; 1179 b->nz = 0; 1180 b->maxnz = nz; 1181 b->sorted = 0; 1182 b->roworiented = 1; 1183 b->nonew = 0; 1184 b->diag = 0; 1185 b->solve_work = 0; 1186 b->mult_work = 0; 1187 b->spptr = 0; 1188 *A = B; 1189 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1190 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1191 return 0; 1192 } 1193 1194 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1195 { 1196 Mat C; 1197 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1198 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1199 1200 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1201 1202 *B = 0; 1203 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1204 PLogObjectCreate(C); 1205 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1206 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1207 C->destroy = MatDestroy_SeqBAIJ; 1208 C->view = MatView_SeqBAIJ; 1209 C->factor = A->factor; 1210 c->row = 0; 1211 c->col = 0; 1212 C->assembled = PETSC_TRUE; 1213 1214 c->m = C->m = a->m; 1215 c->n = C->n = a->n; 1216 C->M = a->m; 1217 C->N = a->n; 1218 1219 c->bs = a->bs; 1220 c->bs2 = a->bs2; 1221 c->mbs = a->mbs; 1222 c->nbs = a->nbs; 1223 1224 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1225 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1226 for ( i=0; i<mbs; i++ ) { 1227 c->imax[i] = a->imax[i]; 1228 c->ilen[i] = a->ilen[i]; 1229 } 1230 1231 /* allocate the matrix space */ 1232 c->singlemalloc = 1; 1233 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1234 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1235 c->j = (int *) (c->a + nz*bs2); 1236 c->i = c->j + nz; 1237 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1238 if (mbs > 0) { 1239 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1240 if (cpvalues == COPY_VALUES) { 1241 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1242 } 1243 } 1244 1245 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1246 c->sorted = a->sorted; 1247 c->roworiented = a->roworiented; 1248 c->nonew = a->nonew; 1249 1250 if (a->diag) { 1251 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1252 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1253 for ( i=0; i<mbs; i++ ) { 1254 c->diag[i] = a->diag[i]; 1255 } 1256 } 1257 else c->diag = 0; 1258 c->nz = a->nz; 1259 c->maxnz = a->maxnz; 1260 c->solve_work = 0; 1261 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1262 c->mult_work = 0; 1263 *B = C; 1264 return 0; 1265 } 1266 1267 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1268 { 1269 Mat_SeqBAIJ *a; 1270 Mat B; 1271 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1272 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1273 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1274 int *masked, nmask,tmp,bs2,ishift; 1275 Scalar *aa; 1276 MPI_Comm comm = ((PetscObject) viewer)->comm; 1277 1278 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1279 bs2 = bs*bs; 1280 1281 MPI_Comm_size(comm,&size); 1282 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1283 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1284 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1285 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1286 M = header[1]; N = header[2]; nz = header[3]; 1287 1288 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1289 1290 /* 1291 This code adds extra rows to make sure the number of rows is 1292 divisible by the blocksize 1293 */ 1294 mbs = M/bs; 1295 extra_rows = bs - M + bs*(mbs); 1296 if (extra_rows == bs) extra_rows = 0; 1297 else mbs++; 1298 if (extra_rows) { 1299 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1300 } 1301 1302 /* read in row lengths */ 1303 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1304 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1305 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1306 1307 /* read in column indices */ 1308 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1309 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1310 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1311 1312 /* loop over row lengths determining block row lengths */ 1313 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1314 PetscMemzero(browlengths,mbs*sizeof(int)); 1315 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1316 PetscMemzero(mask,mbs*sizeof(int)); 1317 masked = mask + mbs; 1318 rowcount = 0; nzcount = 0; 1319 for ( i=0; i<mbs; i++ ) { 1320 nmask = 0; 1321 for ( j=0; j<bs; j++ ) { 1322 kmax = rowlengths[rowcount]; 1323 for ( k=0; k<kmax; k++ ) { 1324 tmp = jj[nzcount++]/bs; 1325 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1326 } 1327 rowcount++; 1328 } 1329 browlengths[i] += nmask; 1330 /* zero out the mask elements we set */ 1331 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1332 } 1333 1334 /* create our matrix */ 1335 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1336 CHKERRQ(ierr); 1337 B = *A; 1338 a = (Mat_SeqBAIJ *) B->data; 1339 1340 /* set matrix "i" values */ 1341 a->i[0] = 0; 1342 for ( i=1; i<= mbs; i++ ) { 1343 a->i[i] = a->i[i-1] + browlengths[i-1]; 1344 a->ilen[i-1] = browlengths[i-1]; 1345 } 1346 a->nz = 0; 1347 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1348 1349 /* read in nonzero values */ 1350 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1351 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1352 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1353 1354 /* set "a" and "j" values into matrix */ 1355 nzcount = 0; jcount = 0; 1356 for ( i=0; i<mbs; i++ ) { 1357 nzcountb = nzcount; 1358 nmask = 0; 1359 for ( j=0; j<bs; j++ ) { 1360 kmax = rowlengths[i*bs+j]; 1361 for ( k=0; k<kmax; k++ ) { 1362 tmp = jj[nzcount++]/bs; 1363 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1364 } 1365 rowcount++; 1366 } 1367 /* sort the masked values */ 1368 PetscSortInt(nmask,masked); 1369 1370 /* set "j" values into matrix */ 1371 maskcount = 1; 1372 for ( j=0; j<nmask; j++ ) { 1373 a->j[jcount++] = masked[j]; 1374 mask[masked[j]] = maskcount++; 1375 } 1376 /* set "a" values into matrix */ 1377 ishift = bs2*a->i[i]; 1378 for ( j=0; j<bs; j++ ) { 1379 kmax = rowlengths[i*bs+j]; 1380 for ( k=0; k<kmax; k++ ) { 1381 tmp = jj[nzcountb]/bs ; 1382 block = mask[tmp] - 1; 1383 point = jj[nzcountb] - bs*tmp; 1384 idx = ishift + bs2*block + j + bs*point; 1385 a->a[idx] = aa[nzcountb++]; 1386 } 1387 } 1388 /* zero out the mask elements we set */ 1389 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1390 } 1391 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1392 1393 PetscFree(rowlengths); 1394 PetscFree(browlengths); 1395 PetscFree(aa); 1396 PetscFree(jj); 1397 PetscFree(mask); 1398 1399 B->assembled = PETSC_TRUE; 1400 1401 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1402 if (flg) { 1403 Viewer tviewer; 1404 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1405 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1406 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1407 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1408 } 1409 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1410 if (flg) { 1411 Viewer tviewer; 1412 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1413 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1414 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1415 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1416 } 1417 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1418 if (flg) { 1419 Viewer tviewer; 1420 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1421 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1422 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1423 } 1424 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1425 if (flg) { 1426 Viewer tviewer; 1427 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1428 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1429 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1430 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1431 } 1432 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1433 if (flg) { 1434 Viewer tviewer; 1435 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1436 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1437 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1438 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1439 } 1440 return 0; 1441 } 1442 1443 1444 1445