1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.38 1996/04/15 16:21:01 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "vec/vecimpl.h" 12 #include "inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(a->n,a->i,a->j,ishift,oshift,&ia,&ja); 45 CHKERRQ(ierr); 46 ierr = MatGetReordering_IJ(a->n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 47 PetscFree(ia); PetscFree(ja); 48 } else { 49 if (ishift == oshift) { 50 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 51 } 52 else if (ishift == -1) { 53 /* temporarily subtract 1 from i and j indices */ 54 int nz = a->i[a->n] - 1; 55 for ( i=0; i<nz; i++ ) a->j[i]--; 56 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 57 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 58 for ( i=0; i<nz; i++ ) a->j[i]++; 59 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 60 } else { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[a->n] - 1; 63 for ( i=0; i<nz; i++ ) a->j[i]++; 64 for ( i=0; i<a->n+1; i++ ) a->i[i]++; 65 ierr = MatGetReordering_IJ(a->n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 66 for ( i=0; i<nz; i++ ) a->j[i]--; 67 for ( i=0; i<a->n+1; i++ ) a->i[i]--; 68 } 69 } 70 return 0; 71 } 72 73 /* 74 Adds diagonal pointers to sparse matrix structure. 75 */ 76 77 int MatMarkDiag_SeqBAIJ(Mat A) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,j, *diag, m = a->mbs; 81 82 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 83 PLogObjectMemory(A,(m+1)*sizeof(int)); 84 for ( i=0; i<m; i++ ) { 85 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 86 if (a->j[j] == i) { 87 diag[i] = j; 88 break; 89 } 90 } 91 } 92 a->diag = diag; 93 return 0; 94 } 95 96 #include "draw.h" 97 #include "pinclude/pviewer.h" 98 #include "sys.h" 99 100 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 101 { 102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 103 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 104 Scalar *aa; 105 106 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 107 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 108 col_lens[0] = MAT_COOKIE; 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 return 0; 153 } 154 155 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 156 { 157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 158 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 159 FILE *fd; 160 char *outputname; 161 162 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 163 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 164 ierr = ViewerGetFormat(viewer,&format); 165 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 166 fprintf(fd," block size is %d\n",bs); 167 } 168 else if (format == ASCII_FORMAT_MATLAB) { 169 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 170 } 171 else if (format == ASCII_FORMAT_COMMON) { 172 for ( i=0; i<a->mbs; i++ ) { 173 for ( j=0; j<bs; j++ ) { 174 fprintf(fd,"row %d:",i*bs+j); 175 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 176 for ( l=0; l<bs; l++ ) { 177 #if defined(PETSC_COMPLEX) 178 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 181 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183 #else 184 if (a->a[bs2*k + l*bs + j] != 0.0) 185 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186 #endif 187 } 188 } 189 fprintf(fd,"\n"); 190 } 191 } 192 } 193 else { 194 for ( i=0; i<a->mbs; i++ ) { 195 for ( j=0; j<bs; j++ ) { 196 fprintf(fd,"row %d:",i*bs+j); 197 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198 for ( l=0; l<bs; l++ ) { 199 #if defined(PETSC_COMPLEX) 200 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 201 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 202 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 203 } 204 else { 205 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 206 } 207 #else 208 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 209 #endif 210 } 211 } 212 fprintf(fd,"\n"); 213 } 214 } 215 } 216 fflush(fd); 217 return 0; 218 } 219 220 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 221 { 222 Mat A = (Mat) obj; 223 ViewerType vtype; 224 int ierr; 225 226 if (!viewer) { 227 viewer = STDOUT_VIEWER_SELF; 228 } 229 230 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 231 if (vtype == MATLAB_VIEWER) { 232 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 233 } 234 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 235 return MatView_SeqBAIJ_ASCII(A,viewer); 236 } 237 else if (vtype == BINARY_FILE_VIEWER) { 238 return MatView_SeqBAIJ_Binary(A,viewer); 239 } 240 else if (vtype == DRAW_VIEWER) { 241 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 242 } 243 return 0; 244 } 245 246 #define CHUNKSIZE 10 247 248 /* This version has row oriented v */ 249 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 250 { 251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 252 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 253 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 254 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 255 int ridx,cidx,bs2=a->bs2; 256 Scalar *ap,value,*aa=a->a,*bap; 257 258 for ( k=0; k<m; k++ ) { /* loop over added rows */ 259 row = im[k]; brow = row/bs; 260 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 261 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 262 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 263 rmax = imax[brow]; nrow = ailen[brow]; 264 low = 0; 265 for ( l=0; l<n; l++ ) { /* loop over added columns */ 266 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 267 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 268 col = in[l]; bcol = col/bs; 269 ridx = row % bs; cidx = col % bs; 270 if (roworiented) { 271 value = *v++; 272 } 273 else { 274 value = v[k + l*m]; 275 } 276 if (!sorted) low = 0; high = nrow; 277 while (high-low > 5) { 278 t = (low+high)/2; 279 if (rp[t] > bcol) high = t; 280 else low = t; 281 } 282 for ( i=low; i<high; i++ ) { 283 if (rp[i] > bcol) break; 284 if (rp[i] == bcol) { 285 bap = ap + bs2*i + bs*cidx + ridx; 286 if (is == ADD_VALUES) *bap += value; 287 else *bap = value; 288 goto noinsert; 289 } 290 } 291 if (nonew) goto noinsert; 292 if (nrow >= rmax) { 293 /* there is no extra room in row, therefore enlarge */ 294 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 295 Scalar *new_a; 296 297 /* malloc new storage space */ 298 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 299 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 300 new_j = (int *) (new_a + bs2*new_nz); 301 new_i = new_j + new_nz; 302 303 /* copy over old data into new slots */ 304 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 305 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 306 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 307 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 308 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 309 len*sizeof(int)); 310 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 311 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 312 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 313 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 314 /* free up old matrix storage */ 315 PetscFree(a->a); 316 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 317 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 318 a->singlemalloc = 1; 319 320 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 321 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 322 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 323 a->maxnz += CHUNKSIZE; 324 a->reallocs++; 325 a->nz++; 326 } 327 N = nrow++ - 1; 328 /* shift up all the later entries in this row */ 329 for ( ii=N; ii>=i; ii-- ) { 330 rp[ii+1] = rp[ii]; 331 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 332 } 333 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 334 rp[i] = bcol; 335 ap[bs2*i + bs*cidx + ridx] = value; 336 noinsert:; 337 low = i; 338 } 339 ailen[brow] = nrow; 340 } 341 return 0; 342 } 343 344 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 345 { 346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 347 *m = a->m; *n = a->n; 348 return 0; 349 } 350 351 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 352 { 353 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 354 *m = 0; *n = a->m; 355 return 0; 356 } 357 358 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 359 { 360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 361 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 362 Scalar *aa,*v_i,*aa_i; 363 364 bs = a->bs; 365 ai = a->i; 366 aj = a->j; 367 aa = a->a; 368 bs2 = a->bs2; 369 370 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 371 372 bn = row/bs; /* Block number */ 373 bp = row % bs; /* Block Position */ 374 M = ai[bn+1] - ai[bn]; 375 *nz = bs*M; 376 377 if (v) { 378 *v = 0; 379 if (*nz) { 380 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 381 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 382 v_i = *v + i*bs; 383 aa_i = aa + bs2*(ai[bn] + i); 384 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 385 } 386 } 387 } 388 389 if (idx) { 390 *idx = 0; 391 if (*nz) { 392 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 393 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 394 idx_i = *idx + i*bs; 395 itmp = bs*aj[ai[bn] + i]; 396 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 397 } 398 } 399 } 400 return 0; 401 } 402 403 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 404 { 405 if (idx) {if (*idx) PetscFree(*idx);} 406 if (v) {if (*v) PetscFree(*v);} 407 return 0; 408 } 409 410 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 411 { 412 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 413 Mat C; 414 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 415 int *rows,*cols,bs2=a->bs2; 416 Scalar *array=a->a; 417 418 if (B==PETSC_NULL && mbs!=nbs) 419 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 420 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 421 PetscMemzero(col,(1+nbs)*sizeof(int)); 422 423 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 424 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 425 PetscFree(col); 426 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 427 cols = rows + bs; 428 for ( i=0; i<mbs; i++ ) { 429 cols[0] = i*bs; 430 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 431 len = ai[i+1] - ai[i]; 432 for ( j=0; j<len; j++ ) { 433 rows[0] = (*aj++)*bs; 434 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 435 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 436 array += bs2; 437 } 438 } 439 PetscFree(rows); 440 441 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 442 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 443 444 if (B != PETSC_NULL) { 445 *B = C; 446 } else { 447 /* This isn't really an in-place transpose */ 448 PetscFree(a->a); 449 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 450 if (a->diag) PetscFree(a->diag); 451 if (a->ilen) PetscFree(a->ilen); 452 if (a->imax) PetscFree(a->imax); 453 if (a->solve_work) PetscFree(a->solve_work); 454 PetscFree(a); 455 PetscMemcpy(A,C,sizeof(struct _Mat)); 456 PetscHeaderDestroy(C); 457 } 458 return 0; 459 } 460 461 462 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 463 { 464 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 465 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 466 int m = a->m,*ip, N, *ailen = a->ilen; 467 int mbs = a->mbs, bs2 = a->bs2; 468 Scalar *aa = a->a, *ap; 469 470 if (mode == FLUSH_ASSEMBLY) return 0; 471 472 for ( i=1; i<mbs; i++ ) { 473 /* move each row back by the amount of empty slots (fshift) before it*/ 474 fshift += imax[i-1] - ailen[i-1]; 475 if (fshift) { 476 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 477 N = ailen[i]; 478 for ( j=0; j<N; j++ ) { 479 ip[j-fshift] = ip[j]; 480 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 481 } 482 } 483 ai[i] = ai[i-1] + ailen[i-1]; 484 } 485 if (mbs) { 486 fshift += imax[mbs-1] - ailen[mbs-1]; 487 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 488 } 489 /* reset ilen and imax for each row */ 490 for ( i=0; i<mbs; i++ ) { 491 ailen[i] = imax[i] = ai[i+1] - ai[i]; 492 } 493 a->nz = ai[mbs]; 494 495 /* diagonals may have moved, so kill the diagonal pointers */ 496 if (fshift && a->diag) { 497 PetscFree(a->diag); 498 PLogObjectMemory(A,-(m+1)*sizeof(int)); 499 a->diag = 0; 500 } 501 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 502 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 503 a->reallocs); 504 return 0; 505 } 506 507 static int MatZeroEntries_SeqBAIJ(Mat A) 508 { 509 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 510 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 511 return 0; 512 } 513 514 int MatDestroy_SeqBAIJ(PetscObject obj) 515 { 516 Mat A = (Mat) obj; 517 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 518 519 #if defined(PETSC_LOG) 520 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 521 #endif 522 PetscFree(a->a); 523 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 524 if (a->diag) PetscFree(a->diag); 525 if (a->ilen) PetscFree(a->ilen); 526 if (a->imax) PetscFree(a->imax); 527 if (a->solve_work) PetscFree(a->solve_work); 528 if (a->mult_work) PetscFree(a->mult_work); 529 PetscFree(a); 530 PLogObjectDestroy(A); 531 PetscHeaderDestroy(A); 532 return 0; 533 } 534 535 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 536 { 537 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 538 if (op == ROW_ORIENTED) a->roworiented = 1; 539 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 540 else if (op == COLUMNS_SORTED) a->sorted = 1; 541 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 542 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 543 else if (op == ROWS_SORTED || 544 op == SYMMETRIC_MATRIX || 545 op == STRUCTURALLY_SYMMETRIC_MATRIX || 546 op == YES_NEW_DIAGONALS) 547 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 548 else if (op == NO_NEW_DIAGONALS) 549 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 550 else 551 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 552 return 0; 553 } 554 555 556 /* -------------------------------------------------------*/ 557 /* Should check that shapes of vectors and matrices match */ 558 /* -------------------------------------------------------*/ 559 #include "pinclude/plapack.h" 560 561 static int MatMultAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz) 562 { 563 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 564 Scalar *xg,*y,*zg; 565 register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5; 566 register Scalar x1,x2,x3,x4,x5; 567 int mbs=a->mbs,m=a->m,i,*idx,*ii; 568 int bs=a->bs,j,n,bs2=a->bs2,ierr; 569 570 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 571 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 572 573 if (yy==PETSC_NULL) PetscMemzero(z,m*sizeof(Scalar)); /* MatMult() */ 574 else if (zz!=yy){ 575 ierr = VecGetArray(yy,&y); CHKERRQ(ierr); 576 PetscMemcpy(z,y,m*sizeof(Scalar)); 577 ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr); 578 } 579 580 idx = a->j; 581 v = a->a; 582 ii = a->i; 583 584 switch (bs) { 585 case 1: 586 for ( i=0; i<mbs; i++ ) { 587 n = ii[1] - ii[0]; ii++; 588 sum = 0.0; 589 while (n--) sum += *v++ * x[*idx++]; 590 z[i] += sum; 591 } 592 break; 593 case 2: 594 for ( i=0; i<mbs; i++ ) { 595 n = ii[1] - ii[0]; ii++; 596 sum1 = 0.0; sum2 = 0.0; 597 for ( j=0; j<n; j++ ) { 598 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 599 sum1 += v[0]*x1 + v[2]*x2; 600 sum2 += v[1]*x1 + v[3]*x2; 601 v += 4; 602 } 603 z[0] += sum1; z[1] += sum2; 604 z += 2; 605 } 606 break; 607 case 3: 608 for ( i=0; i<mbs; i++ ) { 609 n = ii[1] - ii[0]; ii++; 610 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 611 for ( j=0; j<n; j++ ) { 612 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 613 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 614 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 615 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 616 v += 9; 617 } 618 z[0] += sum1; z[1] += sum2; z[2] += sum3; 619 z += 3; 620 } 621 break; 622 case 4: 623 for ( i=0; i<mbs; i++ ) { 624 n = ii[1] - ii[0]; ii++; 625 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 626 for ( j=0; j<n; j++ ) { 627 xb = x + 4*(*idx++); 628 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 629 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 630 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 631 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 632 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 633 v += 16; 634 } 635 z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; 636 z += 4; 637 } 638 break; 639 case 5: 640 for ( i=0; i<mbs; i++ ) { 641 n = ii[1] - ii[0]; ii++; 642 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 643 for ( j=0; j<n; j++ ) { 644 xb = x + 5*(*idx++); 645 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 646 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 647 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 648 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 649 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 650 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 651 v += 25; 652 } 653 z[0] += sum1; z[1] += sum2; z[2] += sum3; z[3] += sum4; z[4] += sum5; 654 z += 5; 655 } 656 break; 657 /* block sizes larger then 5 by 5 are handled by BLAS */ 658 default: { 659 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 660 if (!a->mult_work) { 661 k = PetscMax(a->m,a->n); 662 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 663 CHKPTRQ(a->mult_work); 664 } 665 work = a->mult_work; 666 for ( i=0; i<mbs; i++ ) { 667 n = ii[1] - ii[0]; ii++; 668 ncols = n*bs; 669 workt = work; 670 for ( j=0; j<n; j++ ) { 671 xb = x + bs*(*idx++); 672 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 673 workt += bs; 674 } 675 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 676 v += n*bs2; 677 z += bs; 678 } 679 } 680 } 681 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 682 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 683 return 0; 684 } 685 686 static int MatMult_SeqBAIJ(Mat A,Vec xx, Vec yy) 687 { 688 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 689 int ierr; 690 691 ierr = MatMultAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr); 692 PLogFlops(2*(a->bs2)*(a->nz)-a->m); 693 return 0; 694 } 695 696 static int MatMultAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 697 { 698 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 699 int ierr; 700 701 ierr = MatMultAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr); 702 PLogFlops(2*(a->bs2)*(a->nz)); 703 return 0; 704 } 705 706 static int MatMultTransAdd_SeqBAIJ_Private(Mat A,Vec xx,Vec yy,Vec zz) 707 { 708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 709 Scalar *xg,*y,*zg,*zb; 710 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 711 int mbs=a->mbs,N=a->n,i,*idx,*ii,*ai=a->i,rval; 712 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 713 714 715 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 716 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 717 718 if (yy==PETSC_NULL) PetscMemzero(z,N*sizeof(Scalar)); /* MatMultTrans() */ 719 else if (zz!=yy){ 720 ierr = VecGetArray(yy,&y); CHKERRQ(ierr); 721 PetscMemcpy(z,y,N*sizeof(Scalar)); 722 ierr = VecRestoreArray(yy,&y); CHKERRQ(ierr); 723 } 724 725 idx = a->j; 726 v = a->a; 727 ii = a->i; 728 729 switch (bs) { 730 case 1: 731 for ( i=0; i<mbs; i++ ) { 732 n = ii[1] - ii[0]; ii++; 733 xb = x + i; x1 = xb[0]; 734 ib = idx + ai[i]; 735 for ( j=0; j<n; j++ ) { 736 z[ib[j]] += *v++ * x1; 737 } 738 } 739 break; 740 case 2: 741 for ( i=0; i<mbs; i++ ) { 742 n = ii[1] - ii[0]; ii++; 743 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 744 ib = idx + ai[i]; 745 for ( j=0; j<n; j++ ) { 746 rval = ib[j]*2; 747 z[rval++] += v[0]*x1 + v[1]*x2; 748 z[rval++] += v[2]*x1 + v[3]*x2; 749 v += 4; 750 } 751 } 752 break; 753 case 3: 754 for ( i=0; i<mbs; i++ ) { 755 n = ii[1] - ii[0]; ii++; 756 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 757 ib = idx + ai[i]; 758 for ( j=0; j<n; j++ ) { 759 rval = ib[j]*3; 760 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 761 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 762 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 763 v += 9; 764 } 765 } 766 break; 767 case 4: 768 for ( i=0; i<mbs; i++ ) { 769 n = ii[1] - ii[0]; ii++; 770 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 771 ib = idx + ai[i]; 772 for ( j=0; j<n; j++ ) { 773 rval = ib[j]*4; 774 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 775 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 776 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 777 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 778 v += 16; 779 } 780 } 781 break; 782 case 5: 783 for ( i=0; i<mbs; i++ ) { 784 n = ii[1] - ii[0]; ii++; 785 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 786 x4 = xb[3]; x5 = xb[4]; 787 ib = idx + ai[i]; 788 for ( j=0; j<n; j++ ) { 789 rval = ib[j]*5; 790 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 791 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 792 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 793 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 794 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 795 v += 25; 796 } 797 } 798 break; 799 /* block sizes larger then 5 by 5 are handled by BLAS */ 800 default: { 801 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 802 if (!a->mult_work) { 803 k = PetscMax(a->m,a->n); 804 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 805 CHKPTRQ(a->mult_work); 806 } 807 work = a->mult_work; 808 for ( i=0; i<mbs; i++ ) { 809 n = ii[1] - ii[0]; ii++; 810 ncols = n*bs; 811 PetscMemzero(work,ncols*sizeof(Scalar)); 812 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 813 v += n*bs2; 814 x += bs; 815 workt = work; 816 for ( j=0; j<n; j++ ) { 817 zb = z + bs*(*idx++); 818 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 819 workt += bs; 820 } 821 } 822 } 823 } 824 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 825 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 826 return 0; 827 } 828 829 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx, Vec yy) 830 { 831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 832 int ierr; 833 834 ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,PETSC_NULL,yy); CHKERRQ(ierr); 835 PLogFlops(2*(a->bs2)*(a->nz)-a->n); 836 return 0; 837 } 838 839 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 840 { 841 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 842 int ierr; 843 844 ierr = MatMultTransAdd_SeqBAIJ_Private(A,xx,yy,zz); CHKERRQ(ierr); 845 PLogFlops(2*(a->bs2)*(a->nz)); 846 return 0; 847 } 848 849 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 850 { 851 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 852 if (nz) *nz = a->bs2*a->nz; 853 if (nza) *nza = a->maxnz; 854 if (mem) *mem = (int)A->mem; 855 return 0; 856 } 857 858 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 859 { 860 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 861 862 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 863 864 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 865 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 866 (a->nz != b->nz)) { 867 *flg = PETSC_FALSE; return 0; 868 } 869 870 /* if the a->i are the same */ 871 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 872 *flg = PETSC_FALSE; return 0; 873 } 874 875 /* if a->j are the same */ 876 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 877 *flg = PETSC_FALSE; return 0; 878 } 879 880 /* if a->a are the same */ 881 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 882 *flg = PETSC_FALSE; return 0; 883 } 884 *flg = PETSC_TRUE; 885 return 0; 886 887 } 888 889 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 890 { 891 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 892 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 893 Scalar *x, zero = 0.0,*aa,*aa_j; 894 895 bs = a->bs; 896 aa = a->a; 897 ai = a->i; 898 aj = a->j; 899 ambs = a->mbs; 900 bs2 = a->bs2; 901 902 VecSet(&zero,v); 903 VecGetArray(v,&x); VecGetLocalSize(v,&n); 904 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 905 for ( i=0; i<ambs; i++ ) { 906 for ( j=ai[i]; j<ai[i+1]; j++ ) { 907 if (aj[j] == i) { 908 row = i*bs; 909 aa_j = aa+j*bs2; 910 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 911 break; 912 } 913 } 914 } 915 return 0; 916 } 917 918 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 919 { 920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 921 Scalar *l,*r,x,*v,*aa,*li,*ri; 922 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 923 924 ai = a->i; 925 aj = a->j; 926 aa = a->a; 927 m = a->m; 928 n = a->n; 929 bs = a->bs; 930 mbs = a->mbs; 931 bs2 = a->bs2; 932 if (ll) { 933 VecGetArray(ll,&l); VecGetSize(ll,&lm); 934 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 935 for ( i=0; i<mbs; i++ ) { /* for each block row */ 936 M = ai[i+1] - ai[i]; 937 li = l + i*bs; 938 v = aa + bs2*ai[i]; 939 for ( j=0; j<M; j++ ) { /* for each block */ 940 for ( k=0; k<bs2; k++ ) { 941 (*v++) *= li[k%bs]; 942 } 943 } 944 } 945 } 946 947 if (rr) { 948 VecGetArray(rr,&r); VecGetSize(rr,&rn); 949 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 950 for ( i=0; i<mbs; i++ ) { /* for each block row */ 951 M = ai[i+1] - ai[i]; 952 v = aa + bs2*ai[i]; 953 for ( j=0; j<M; j++ ) { /* for each block */ 954 ri = r + bs*aj[ai[i]+j]; 955 for ( k=0; k<bs; k++ ) { 956 x = ri[k]; 957 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 958 } 959 } 960 } 961 } 962 return 0; 963 } 964 965 966 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 967 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 968 extern int MatSolve_SeqBAIJ(Mat,Vec,Vec); 969 extern int MatSolveAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 970 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 971 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 972 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 973 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 974 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 975 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 976 977 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 978 { 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 980 Scalar *v = a->a; 981 double sum = 0.0; 982 int i,nz=a->nz,bs2=a->bs2; 983 984 if (type == NORM_FROBENIUS) { 985 for (i=0; i< bs2*nz; i++ ) { 986 #if defined(PETSC_COMPLEX) 987 sum += real(conj(*v)*(*v)); v++; 988 #else 989 sum += (*v)*(*v); v++; 990 #endif 991 } 992 *norm = sqrt(sum); 993 } 994 else { 995 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 996 } 997 return 0; 998 } 999 1000 /* 1001 note: This can only work for identity for row and col. It would 1002 be good to check this and otherwise generate an error. 1003 */ 1004 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1005 { 1006 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1007 Mat outA; 1008 int ierr; 1009 1010 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1011 1012 outA = inA; 1013 inA->factor = FACTOR_LU; 1014 a->row = row; 1015 a->col = col; 1016 1017 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1018 1019 if (!a->diag) { 1020 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1021 } 1022 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1023 return 0; 1024 } 1025 1026 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1027 { 1028 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1029 int one = 1, totalnz = a->bs2*a->nz; 1030 BLscal_( &totalnz, alpha, a->a, &one ); 1031 PLogFlops(totalnz); 1032 return 0; 1033 } 1034 1035 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1036 { 1037 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1038 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1039 int *ai = a->i, *ailen = a->ilen; 1040 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1041 Scalar *ap, *aa = a->a, zero = 0.0; 1042 1043 for ( k=0; k<m; k++ ) { /* loop over rows */ 1044 row = im[k]; brow = row/bs; 1045 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1046 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1047 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1048 nrow = ailen[brow]; 1049 for ( l=0; l<n; l++ ) { /* loop over columns */ 1050 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1051 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1052 col = in[l] ; 1053 bcol = col/bs; 1054 cidx = col%bs; 1055 ridx = row%bs; 1056 high = nrow; 1057 low = 0; /* assume unsorted */ 1058 while (high-low > 5) { 1059 t = (low+high)/2; 1060 if (rp[t] > bcol) high = t; 1061 else low = t; 1062 } 1063 for ( i=low; i<high; i++ ) { 1064 if (rp[i] > bcol) break; 1065 if (rp[i] == bcol) { 1066 *v++ = ap[bs2*i+bs*cidx+ridx]; 1067 goto finished; 1068 } 1069 } 1070 *v++ = zero; 1071 finished:; 1072 } 1073 } 1074 return 0; 1075 } 1076 1077 int MatPrintHelp_SeqBAIJ(Mat A) 1078 { 1079 static int called = 0; 1080 1081 if (called) return 0; else called = 1; 1082 return 0; 1083 } 1084 /* -------------------------------------------------------------------*/ 1085 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1086 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1087 MatMult_SeqBAIJ,MatMultAdd_SeqBAIJ, 1088 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1089 MatSolve_SeqBAIJ,MatSolveAdd_SeqBAIJ, 1090 0,0, 1091 MatLUFactor_SeqBAIJ,0, 1092 0, 1093 MatTranspose_SeqBAIJ, 1094 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1095 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1096 0,MatAssemblyEnd_SeqBAIJ, 1097 0, 1098 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1099 MatGetReordering_SeqBAIJ, 1100 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1101 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1102 MatILUFactorSymbolic_SeqBAIJ,0, 1103 0,0,/* MatConvert_SeqBAIJ */ 0, 1104 0,0, 1105 MatConvertSameType_SeqBAIJ,0,0, 1106 MatILUFactor_SeqBAIJ,0,0, 1107 0,0, 1108 MatGetValues_SeqBAIJ,0, 1109 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1110 0}; 1111 1112 /*@C 1113 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1114 compressed row) format. For good matrix assembly performance the 1115 user should preallocate the matrix storage by setting the parameter nz 1116 (or nzz). By setting these parameters accurately, performance can be 1117 increased by more than a factor of 50. 1118 1119 Input Parameters: 1120 . comm - MPI communicator, set to MPI_COMM_SELF 1121 . bs - size of block 1122 . m - number of rows 1123 . n - number of columns 1124 . nz - number of block nonzeros per block row (same for all rows) 1125 . nzz - number of block nonzeros per block row or PETSC_NULL 1126 (possibly different for each row) 1127 1128 Output Parameter: 1129 . A - the matrix 1130 1131 Notes: 1132 The block AIJ format is fully compatible with standard Fortran 77 1133 storage. That is, the stored row and column indices can begin at 1134 either one (as in Fortran) or zero. See the users' manual for details. 1135 1136 Specify the preallocated storage with either nz or nnz (not both). 1137 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1138 allocation. For additional details, see the users manual chapter on 1139 matrices and the file $(PETSC_DIR)/Performance. 1140 1141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1142 @*/ 1143 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1144 { 1145 Mat B; 1146 Mat_SeqBAIJ *b; 1147 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1148 1149 if (mbs*bs!=m || nbs*bs!=n) 1150 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1151 1152 *A = 0; 1153 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1154 PLogObjectCreate(B); 1155 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1156 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1157 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1158 switch (bs) { 1159 case 1: 1160 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1161 break; 1162 case 2: 1163 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1164 break; 1165 case 3: 1166 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1167 break; 1168 case 4: 1169 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1170 break; 1171 case 5: 1172 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1173 break; 1174 } 1175 B->destroy = MatDestroy_SeqBAIJ; 1176 B->view = MatView_SeqBAIJ; 1177 B->factor = 0; 1178 B->lupivotthreshold = 1.0; 1179 b->row = 0; 1180 b->col = 0; 1181 b->reallocs = 0; 1182 1183 b->m = m; B->m = m; B->M = m; 1184 b->n = n; B->n = n; B->N = n; 1185 b->mbs = mbs; 1186 b->nbs = nbs; 1187 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1188 if (nnz == PETSC_NULL) { 1189 if (nz == PETSC_DEFAULT) nz = 5; 1190 else if (nz <= 0) nz = 1; 1191 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1192 nz = nz*mbs; 1193 } 1194 else { 1195 nz = 0; 1196 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1197 } 1198 1199 /* allocate the matrix space */ 1200 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1201 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1202 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1203 b->j = (int *) (b->a + nz*bs2); 1204 PetscMemzero(b->j,nz*sizeof(int)); 1205 b->i = b->j + nz; 1206 b->singlemalloc = 1; 1207 1208 b->i[0] = 0; 1209 for (i=1; i<mbs+1; i++) { 1210 b->i[i] = b->i[i-1] + b->imax[i-1]; 1211 } 1212 1213 /* b->ilen will count nonzeros in each block row so far. */ 1214 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1215 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1216 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1217 1218 b->bs = bs; 1219 b->bs2 = bs2; 1220 b->mbs = mbs; 1221 b->nz = 0; 1222 b->maxnz = nz; 1223 b->sorted = 0; 1224 b->roworiented = 1; 1225 b->nonew = 0; 1226 b->diag = 0; 1227 b->solve_work = 0; 1228 b->mult_work = 0; 1229 b->spptr = 0; 1230 *A = B; 1231 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1232 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1233 return 0; 1234 } 1235 1236 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1237 { 1238 Mat C; 1239 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1240 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1241 1242 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1243 1244 *B = 0; 1245 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1246 PLogObjectCreate(C); 1247 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1248 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1249 C->destroy = MatDestroy_SeqBAIJ; 1250 C->view = MatView_SeqBAIJ; 1251 C->factor = A->factor; 1252 c->row = 0; 1253 c->col = 0; 1254 C->assembled = PETSC_TRUE; 1255 1256 c->m = C->m = a->m; 1257 c->n = C->n = a->n; 1258 C->M = a->m; 1259 C->N = a->n; 1260 1261 c->bs = a->bs; 1262 c->bs2 = a->bs2; 1263 c->mbs = a->mbs; 1264 c->nbs = a->nbs; 1265 1266 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1267 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1268 for ( i=0; i<mbs; i++ ) { 1269 c->imax[i] = a->imax[i]; 1270 c->ilen[i] = a->ilen[i]; 1271 } 1272 1273 /* allocate the matrix space */ 1274 c->singlemalloc = 1; 1275 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1276 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1277 c->j = (int *) (c->a + nz*bs2); 1278 c->i = c->j + nz; 1279 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1280 if (mbs > 0) { 1281 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1282 if (cpvalues == COPY_VALUES) { 1283 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1284 } 1285 } 1286 1287 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1288 c->sorted = a->sorted; 1289 c->roworiented = a->roworiented; 1290 c->nonew = a->nonew; 1291 1292 if (a->diag) { 1293 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1294 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1295 for ( i=0; i<mbs; i++ ) { 1296 c->diag[i] = a->diag[i]; 1297 } 1298 } 1299 else c->diag = 0; 1300 c->nz = a->nz; 1301 c->maxnz = a->maxnz; 1302 c->solve_work = 0; 1303 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1304 c->mult_work = 0; 1305 *B = C; 1306 return 0; 1307 } 1308 1309 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1310 { 1311 Mat_SeqBAIJ *a; 1312 Mat B; 1313 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1314 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1315 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1316 int *masked, nmask,tmp,bs2,ishift; 1317 Scalar *aa; 1318 MPI_Comm comm = ((PetscObject) viewer)->comm; 1319 1320 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1321 bs2 = bs*bs; 1322 1323 MPI_Comm_size(comm,&size); 1324 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1325 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1326 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1327 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1328 M = header[1]; N = header[2]; nz = header[3]; 1329 1330 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1331 1332 /* 1333 This code adds extra rows to make sure the number of rows is 1334 divisible by the blocksize 1335 */ 1336 mbs = M/bs; 1337 extra_rows = bs - M + bs*(mbs); 1338 if (extra_rows == bs) extra_rows = 0; 1339 else mbs++; 1340 if (extra_rows) { 1341 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loading matrix to match blocksize"); 1342 } 1343 1344 /* read in row lengths */ 1345 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1346 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1347 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1348 1349 /* read in column indices */ 1350 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1351 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1352 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1353 1354 /* loop over row lengths determining block row lengths */ 1355 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1356 PetscMemzero(browlengths,mbs*sizeof(int)); 1357 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1358 PetscMemzero(mask,mbs*sizeof(int)); 1359 masked = mask + mbs; 1360 rowcount = 0; nzcount = 0; 1361 for ( i=0; i<mbs; i++ ) { 1362 nmask = 0; 1363 for ( j=0; j<bs; j++ ) { 1364 kmax = rowlengths[rowcount]; 1365 for ( k=0; k<kmax; k++ ) { 1366 tmp = jj[nzcount++]/bs; 1367 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1368 } 1369 rowcount++; 1370 } 1371 browlengths[i] += nmask; 1372 /* zero out the mask elements we set */ 1373 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1374 } 1375 1376 /* create our matrix */ 1377 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1378 CHKERRQ(ierr); 1379 B = *A; 1380 a = (Mat_SeqBAIJ *) B->data; 1381 1382 /* set matrix "i" values */ 1383 a->i[0] = 0; 1384 for ( i=1; i<= mbs; i++ ) { 1385 a->i[i] = a->i[i-1] + browlengths[i-1]; 1386 a->ilen[i-1] = browlengths[i-1]; 1387 } 1388 a->nz = 0; 1389 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1390 1391 /* read in nonzero values */ 1392 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1393 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1394 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1395 1396 /* set "a" and "j" values into matrix */ 1397 nzcount = 0; jcount = 0; 1398 for ( i=0; i<mbs; i++ ) { 1399 nzcountb = nzcount; 1400 nmask = 0; 1401 for ( j=0; j<bs; j++ ) { 1402 kmax = rowlengths[i*bs+j]; 1403 for ( k=0; k<kmax; k++ ) { 1404 tmp = jj[nzcount++]/bs; 1405 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1406 } 1407 rowcount++; 1408 } 1409 /* sort the masked values */ 1410 PetscSortInt(nmask,masked); 1411 1412 /* set "j" values into matrix */ 1413 maskcount = 1; 1414 for ( j=0; j<nmask; j++ ) { 1415 a->j[jcount++] = masked[j]; 1416 mask[masked[j]] = maskcount++; 1417 } 1418 /* set "a" values into matrix */ 1419 ishift = bs2*a->i[i]; 1420 for ( j=0; j<bs; j++ ) { 1421 kmax = rowlengths[i*bs+j]; 1422 for ( k=0; k<kmax; k++ ) { 1423 tmp = jj[nzcountb]/bs ; 1424 block = mask[tmp] - 1; 1425 point = jj[nzcountb] - bs*tmp; 1426 idx = ishift + bs2*block + j + bs*point; 1427 a->a[idx] = aa[nzcountb++]; 1428 } 1429 } 1430 /* zero out the mask elements we set */ 1431 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1432 } 1433 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1434 1435 PetscFree(rowlengths); 1436 PetscFree(browlengths); 1437 PetscFree(aa); 1438 PetscFree(jj); 1439 PetscFree(mask); 1440 1441 B->assembled = PETSC_TRUE; 1442 1443 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1444 if (flg) { 1445 Viewer tviewer; 1446 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1447 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1448 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1449 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1450 } 1451 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1452 if (flg) { 1453 Viewer tviewer; 1454 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1455 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1456 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1457 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1458 } 1459 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1460 if (flg) { 1461 Viewer tviewer; 1462 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1463 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1464 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1465 } 1466 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1467 if (flg) { 1468 Viewer tviewer; 1469 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1470 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1471 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1472 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1473 } 1474 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1475 if (flg) { 1476 Viewer tviewer; 1477 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1478 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1479 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1480 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1481 } 1482 return 0; 1483 } 1484 1485 1486 1487