1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 15 int MatInvertBlockDiagonal_SeqBAIJ(Mat A) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 18 int *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr; 19 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) PetscFunctionReturn(0); 23 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 24 diag_offset = a->diag; 25 if (!a->idiag) { 26 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 27 } 28 diag = a->idiag; 29 mdiag = a->idiag+bs*bs*mbs; 30 /* factor and invert each block */ 31 switch (a->bs){ 32 case 2: 33 for (i=0; i<mbs; i++) { 34 odiag = v + 4*diag_offset[i]; 35 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 36 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 37 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 38 diag += 4; 39 mdiag += 4; 40 } 41 break; 42 case 3: 43 for (i=0; i<mbs; i++) { 44 odiag = v + 9*diag_offset[i]; 45 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 46 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 47 diag[8] = odiag[8]; 48 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 49 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 50 mdiag[8] = odiag[8]; 51 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 52 diag += 9; 53 mdiag += 9; 54 } 55 break; 56 case 4: 57 for (i=0; i<mbs; i++) { 58 odiag = v + 16*diag_offset[i]; 59 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 61 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 62 diag += 16; 63 mdiag += 16; 64 } 65 break; 66 case 5: 67 for (i=0; i<mbs; i++) { 68 odiag = v + 25*diag_offset[i]; 69 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 71 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 72 diag += 25; 73 mdiag += 25; 74 } 75 break; 76 default: 77 SETERRQ1(1,"not supported for block size %d",a->bs); 78 } 79 a->idiagvalid = PETSC_TRUE; 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 85 int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 88 PetscScalar *x,x1,x2,s1,s2; 89 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 90 int ierr,m = a->mbs,i,i2,nz,idx; 91 const int *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 int ierr,m = a->mbs,i,i2,nz,idx; 192 const int *diag,*ai = a->i,*aj = a->j,*vi; 193 194 PetscFunctionBegin; 195 its = its*lits; 196 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 197 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 198 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 199 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 200 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 201 202 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 203 204 diag = a->diag; 205 idiag = a->idiag; 206 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 207 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 208 209 if (flag & SOR_ZERO_INITIAL_GUESS) { 210 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 211 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 212 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 213 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 214 i2 = 3; 215 idiag += 9; 216 for (i=1; i<m; i++) { 217 v = aa + 9*ai[i]; 218 vi = aj + ai[i]; 219 nz = diag[i] - ai[i]; 220 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 221 while (nz--) { 222 idx = 3*(*vi++); 223 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 224 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 225 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 226 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 227 v += 9; 228 } 229 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 230 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 231 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 232 idiag += 9; 233 i2 += 3; 234 } 235 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 236 PetscLogFlops(9*(a->nz)); 237 } 238 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 239 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 240 i2 = 0; 241 mdiag = a->idiag+9*a->mbs; 242 for (i=0; i<m; i++) { 243 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 244 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 245 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 246 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 247 mdiag += 9; 248 i2 += 3; 249 } 250 PetscLogFlops(15*m); 251 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 252 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 253 } 254 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 255 idiag = a->idiag+9*a->mbs - 9; 256 i2 = 3*m - 3; 257 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 258 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 259 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 260 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 261 idiag -= 9; 262 i2 -= 3; 263 for (i=m-2; i>=0; i--) { 264 v = aa + 9*(diag[i]+1); 265 vi = aj + diag[i] + 1; 266 nz = ai[i+1] - diag[i] - 1; 267 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 268 while (nz--) { 269 idx = 3*(*vi++); 270 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 271 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 272 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 273 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 274 v += 9; 275 } 276 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 277 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 278 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 279 idiag -= 9; 280 i2 -= 3; 281 } 282 PetscLogFlops(9*(a->nz)); 283 } 284 } else { 285 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 286 } 287 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 288 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 294 int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 295 { 296 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 297 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 298 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 299 int ierr,m = a->mbs,i,i2,nz,idx; 300 const int *diag,*ai = a->i,*aj = a->j,*vi; 301 302 PetscFunctionBegin; 303 its = its*lits; 304 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 305 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 306 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 307 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 308 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 309 310 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 311 312 diag = a->diag; 313 idiag = a->idiag; 314 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 315 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 316 317 if (flag & SOR_ZERO_INITIAL_GUESS) { 318 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 319 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 320 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 321 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 322 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 323 i2 = 4; 324 idiag += 16; 325 for (i=1; i<m; i++) { 326 v = aa + 16*ai[i]; 327 vi = aj + ai[i]; 328 nz = diag[i] - ai[i]; 329 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 330 while (nz--) { 331 idx = 4*(*vi++); 332 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 333 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 334 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 335 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 336 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 337 v += 16; 338 } 339 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 340 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 341 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 342 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 343 idiag += 16; 344 i2 += 4; 345 } 346 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 347 PetscLogFlops(16*(a->nz)); 348 } 349 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 350 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 351 i2 = 0; 352 mdiag = a->idiag+16*a->mbs; 353 for (i=0; i<m; i++) { 354 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 355 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 356 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 357 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 358 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 359 mdiag += 16; 360 i2 += 4; 361 } 362 PetscLogFlops(28*m); 363 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 364 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 365 } 366 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 367 idiag = a->idiag+16*a->mbs - 16; 368 i2 = 4*m - 4; 369 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 370 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 371 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 372 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 373 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 374 idiag -= 16; 375 i2 -= 4; 376 for (i=m-2; i>=0; i--) { 377 v = aa + 16*(diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 381 while (nz--) { 382 idx = 4*(*vi++); 383 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 384 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 385 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 386 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 387 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 388 v += 16; 389 } 390 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 391 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 392 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 393 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 394 idiag -= 16; 395 i2 -= 4; 396 } 397 PetscLogFlops(16*(a->nz)); 398 } 399 } else { 400 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 401 } 402 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 403 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 407 #undef __FUNCT__ 408 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 409 int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 410 { 411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 412 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 413 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 414 int ierr,m = a->mbs,i,i2,nz,idx; 415 const int *diag,*ai = a->i,*aj = a->j,*vi; 416 417 PetscFunctionBegin; 418 its = its*lits; 419 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 420 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 421 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 422 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 423 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 424 425 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 426 427 diag = a->diag; 428 idiag = a->idiag; 429 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 430 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 431 432 if (flag & SOR_ZERO_INITIAL_GUESS) { 433 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 434 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 435 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 436 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 437 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 438 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 439 i2 = 5; 440 idiag += 25; 441 for (i=1; i<m; i++) { 442 v = aa + 25*ai[i]; 443 vi = aj + ai[i]; 444 nz = diag[i] - ai[i]; 445 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 446 while (nz--) { 447 idx = 5*(*vi++); 448 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 449 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 450 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 451 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 452 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 453 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 454 v += 25; 455 } 456 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 457 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 458 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 459 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 460 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 461 idiag += 25; 462 i2 += 5; 463 } 464 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 465 PetscLogFlops(25*(a->nz)); 466 } 467 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 468 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 469 i2 = 0; 470 mdiag = a->idiag+25*a->mbs; 471 for (i=0; i<m; i++) { 472 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 473 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 474 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 475 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 476 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 477 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 478 mdiag += 25; 479 i2 += 5; 480 } 481 PetscLogFlops(45*m); 482 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 483 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 484 } 485 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 486 idiag = a->idiag+25*a->mbs - 25; 487 i2 = 5*m - 5; 488 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 489 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 490 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 491 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 492 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 493 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 494 idiag -= 25; 495 i2 -= 5; 496 for (i=m-2; i>=0; i--) { 497 v = aa + 25*(diag[i]+1); 498 vi = aj + diag[i] + 1; 499 nz = ai[i+1] - diag[i] - 1; 500 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 501 while (nz--) { 502 idx = 5*(*vi++); 503 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 504 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 505 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 506 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 507 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 508 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 509 v += 25; 510 } 511 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 512 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 513 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 514 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 515 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 516 idiag -= 25; 517 i2 -= 5; 518 } 519 PetscLogFlops(25*(a->nz)); 520 } 521 } else { 522 SETERRQ(1,"Only supports point block SOR with zero initial guess"); 523 } 524 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 525 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 /* 530 Special version for Fun3d sequential benchmark 531 */ 532 #if defined(PETSC_HAVE_FORTRAN_CAPS) 533 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 534 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 535 #define matsetvaluesblocked4_ matsetvaluesblocked4 536 #endif 537 538 EXTERN_C_BEGIN 539 #undef __FUNCT__ 540 #define __FUNCT__ "matsetvaluesblocked4_" 541 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 542 { 543 Mat A = *AA; 544 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 545 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 546 int *ai=a->i,*ailen=a->ilen; 547 int *aj=a->j,stepval; 548 const PetscScalar *value = v; 549 MatScalar *ap,*aa = a->a,*bap; 550 551 PetscFunctionBegin; 552 stepval = (n-1)*4; 553 for (k=0; k<m; k++) { /* loop over added rows */ 554 row = im[k]; 555 rp = aj + ai[row]; 556 ap = aa + 16*ai[row]; 557 nrow = ailen[row]; 558 low = 0; 559 for (l=0; l<n; l++) { /* loop over added columns */ 560 col = in[l]; 561 value = v + k*(stepval+4)*4 + l*4; 562 low = 0; high = nrow; 563 while (high-low > 7) { 564 t = (low+high)/2; 565 if (rp[t] > col) high = t; 566 else low = t; 567 } 568 for (i=low; i<high; i++) { 569 if (rp[i] > col) break; 570 if (rp[i] == col) { 571 bap = ap + 16*i; 572 for (ii=0; ii<4; ii++,value+=stepval) { 573 for (jj=ii; jj<16; jj+=4) { 574 bap[jj] += *value++; 575 } 576 } 577 goto noinsert2; 578 } 579 } 580 N = nrow++ - 1; 581 /* shift up all the later entries in this row */ 582 for (ii=N; ii>=i; ii--) { 583 rp[ii+1] = rp[ii]; 584 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 585 } 586 if (N >= i) { 587 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 588 } 589 rp[i] = col; 590 bap = ap + 16*i; 591 for (ii=0; ii<4; ii++,value+=stepval) { 592 for (jj=ii; jj<16; jj+=4) { 593 bap[jj] = *value++; 594 } 595 } 596 noinsert2:; 597 low = i; 598 } 599 ailen[row] = nrow; 600 } 601 } 602 EXTERN_C_END 603 604 #if defined(PETSC_HAVE_FORTRAN_CAPS) 605 #define matsetvalues4_ MATSETVALUES4 606 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 607 #define matsetvalues4_ matsetvalues4 608 #endif 609 610 EXTERN_C_BEGIN 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatSetValues4_" 613 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 614 { 615 Mat A = *AA; 616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 617 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 618 int *ai=a->i,*ailen=a->ilen; 619 int *aj=a->j,brow,bcol; 620 int ridx,cidx; 621 MatScalar *ap,value,*aa=a->a,*bap; 622 623 PetscFunctionBegin; 624 for (k=0; k<m; k++) { /* loop over added rows */ 625 row = im[k]; brow = row/4; 626 rp = aj + ai[brow]; 627 ap = aa + 16*ai[brow]; 628 nrow = ailen[brow]; 629 low = 0; 630 for (l=0; l<n; l++) { /* loop over added columns */ 631 col = in[l]; bcol = col/4; 632 ridx = row % 4; cidx = col % 4; 633 value = v[l + k*n]; 634 low = 0; high = nrow; 635 while (high-low > 7) { 636 t = (low+high)/2; 637 if (rp[t] > bcol) high = t; 638 else low = t; 639 } 640 for (i=low; i<high; i++) { 641 if (rp[i] > bcol) break; 642 if (rp[i] == bcol) { 643 bap = ap + 16*i + 4*cidx + ridx; 644 *bap += value; 645 goto noinsert1; 646 } 647 } 648 N = nrow++ - 1; 649 /* shift up all the later entries in this row */ 650 for (ii=N; ii>=i; ii--) { 651 rp[ii+1] = rp[ii]; 652 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 653 } 654 if (N>=i) { 655 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 656 } 657 rp[i] = bcol; 658 ap[16*i + 4*cidx + ridx] = value; 659 noinsert1:; 660 low = i; 661 } 662 ailen[brow] = nrow; 663 } 664 } 665 EXTERN_C_END 666 667 /* UGLY, ugly, ugly 668 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 669 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 670 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 671 converts the entries into single precision and then calls ..._MatScalar() to put them 672 into the single precision data structures. 673 */ 674 #if defined(PETSC_USE_MAT_SINGLE) 675 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 676 #else 677 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 678 #endif 679 680 #define CHUNKSIZE 10 681 682 /* 683 Checks for missing diagonals 684 */ 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 687 int MatMissingDiagonal_SeqBAIJ(Mat A) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 int *diag,*jj = a->j,i,ierr; 691 692 PetscFunctionBegin; 693 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 694 diag = a->diag; 695 for (i=0; i<a->mbs; i++) { 696 if (jj[diag[i]] != i) { 697 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 698 } 699 } 700 PetscFunctionReturn(0); 701 } 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 705 int MatMarkDiagonal_SeqBAIJ(Mat A) 706 { 707 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 708 int i,j,*diag,m = a->mbs,ierr; 709 710 PetscFunctionBegin; 711 if (a->diag) PetscFunctionReturn(0); 712 713 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 714 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 715 for (i=0; i<m; i++) { 716 diag[i] = a->i[i+1]; 717 for (j=a->i[i]; j<a->i[i+1]; j++) { 718 if (a->j[j] == i) { 719 diag[i] = j; 720 break; 721 } 722 } 723 } 724 a->diag = diag; 725 PetscFunctionReturn(0); 726 } 727 728 729 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 733 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 int ierr,n = a->mbs,i; 737 738 PetscFunctionBegin; 739 *nn = n; 740 if (!ia) PetscFunctionReturn(0); 741 if (symmetric) { 742 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 743 } else if (oshift == 1) { 744 /* temporarily add 1 to i and j indices */ 745 int nz = a->i[n]; 746 for (i=0; i<nz; i++) a->j[i]++; 747 for (i=0; i<n+1; i++) a->i[i]++; 748 *ia = a->i; *ja = a->j; 749 } else { 750 *ia = a->i; *ja = a->j; 751 } 752 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 758 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 759 { 760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 761 int i,n = a->mbs,ierr; 762 763 PetscFunctionBegin; 764 if (!ia) PetscFunctionReturn(0); 765 if (symmetric) { 766 ierr = PetscFree(*ia);CHKERRQ(ierr); 767 ierr = PetscFree(*ja);CHKERRQ(ierr); 768 } else if (oshift == 1) { 769 int nz = a->i[n]-1; 770 for (i=0; i<nz; i++) a->j[i]--; 771 for (i=0; i<n+1; i++) a->i[i]--; 772 } 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 778 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 779 { 780 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 781 782 PetscFunctionBegin; 783 *bs = baij->bs; 784 PetscFunctionReturn(0); 785 } 786 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatDestroy_SeqBAIJ" 790 int MatDestroy_SeqBAIJ(Mat A) 791 { 792 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 793 int ierr; 794 795 PetscFunctionBegin; 796 #if defined(PETSC_USE_LOG) 797 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 798 #endif 799 ierr = PetscFree(a->a);CHKERRQ(ierr); 800 if (!a->singlemalloc) { 801 ierr = PetscFree(a->i);CHKERRQ(ierr); 802 ierr = PetscFree(a->j);CHKERRQ(ierr); 803 } 804 if (a->row) { 805 ierr = ISDestroy(a->row);CHKERRQ(ierr); 806 } 807 if (a->col) { 808 ierr = ISDestroy(a->col);CHKERRQ(ierr); 809 } 810 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 811 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 812 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 813 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 814 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 815 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 816 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 817 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 818 #if defined(PETSC_USE_MAT_SINGLE) 819 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 820 #endif 821 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 822 823 ierr = PetscFree(a);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatSetOption_SeqBAIJ" 829 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 830 { 831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 832 833 PetscFunctionBegin; 834 switch (op) { 835 case MAT_ROW_ORIENTED: 836 a->roworiented = PETSC_TRUE; 837 break; 838 case MAT_COLUMN_ORIENTED: 839 a->roworiented = PETSC_FALSE; 840 break; 841 case MAT_COLUMNS_SORTED: 842 a->sorted = PETSC_TRUE; 843 break; 844 case MAT_COLUMNS_UNSORTED: 845 a->sorted = PETSC_FALSE; 846 break; 847 case MAT_KEEP_ZEROED_ROWS: 848 a->keepzeroedrows = PETSC_TRUE; 849 break; 850 case MAT_NO_NEW_NONZERO_LOCATIONS: 851 a->nonew = 1; 852 break; 853 case MAT_NEW_NONZERO_LOCATION_ERR: 854 a->nonew = -1; 855 break; 856 case MAT_NEW_NONZERO_ALLOCATION_ERR: 857 a->nonew = -2; 858 break; 859 case MAT_YES_NEW_NONZERO_LOCATIONS: 860 a->nonew = 0; 861 break; 862 case MAT_ROWS_SORTED: 863 case MAT_ROWS_UNSORTED: 864 case MAT_YES_NEW_DIAGONALS: 865 case MAT_IGNORE_OFF_PROC_ENTRIES: 866 case MAT_USE_HASH_TABLE: 867 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 868 break; 869 case MAT_NO_NEW_DIAGONALS: 870 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 871 case MAT_SYMMETRIC: 872 case MAT_STRUCTURALLY_SYMMETRIC: 873 case MAT_NOT_SYMMETRIC: 874 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 875 case MAT_HERMITIAN: 876 case MAT_NOT_HERMITIAN: 877 case MAT_SYMMETRY_ETERNAL: 878 case MAT_NOT_SYMMETRY_ETERNAL: 879 break; 880 default: 881 SETERRQ(PETSC_ERR_SUP,"unknown option"); 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "MatGetRow_SeqBAIJ" 888 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 889 { 890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 891 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 892 MatScalar *aa,*aa_i; 893 PetscScalar *v_i; 894 895 PetscFunctionBegin; 896 bs = a->bs; 897 ai = a->i; 898 aj = a->j; 899 aa = a->a; 900 bs2 = a->bs2; 901 902 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 903 904 bn = row/bs; /* Block number */ 905 bp = row % bs; /* Block Position */ 906 M = ai[bn+1] - ai[bn]; 907 *nz = bs*M; 908 909 if (v) { 910 *v = 0; 911 if (*nz) { 912 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 913 for (i=0; i<M; i++) { /* for each block in the block row */ 914 v_i = *v + i*bs; 915 aa_i = aa + bs2*(ai[bn] + i); 916 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 917 } 918 } 919 } 920 921 if (idx) { 922 *idx = 0; 923 if (*nz) { 924 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 925 for (i=0; i<M; i++) { /* for each block in the block row */ 926 idx_i = *idx + i*bs; 927 itmp = bs*aj[ai[bn] + i]; 928 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 929 } 930 } 931 } 932 PetscFunctionReturn(0); 933 } 934 935 #undef __FUNCT__ 936 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 937 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 938 { 939 int ierr; 940 941 PetscFunctionBegin; 942 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 943 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatTranspose_SeqBAIJ" 949 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 950 { 951 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 952 Mat C; 953 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 954 int *rows,*cols,bs2=a->bs2; 955 PetscScalar *array; 956 957 PetscFunctionBegin; 958 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 959 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 960 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 961 962 #if defined(PETSC_USE_MAT_SINGLE) 963 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 964 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 965 #else 966 array = a->a; 967 #endif 968 969 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 970 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 971 ierr = PetscFree(col);CHKERRQ(ierr); 972 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 973 cols = rows + bs; 974 for (i=0; i<mbs; i++) { 975 cols[0] = i*bs; 976 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 977 len = ai[i+1] - ai[i]; 978 for (j=0; j<len; j++) { 979 rows[0] = (*aj++)*bs; 980 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 981 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 982 array += bs2; 983 } 984 } 985 ierr = PetscFree(rows);CHKERRQ(ierr); 986 #if defined(PETSC_USE_MAT_SINGLE) 987 ierr = PetscFree(array); 988 #endif 989 990 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 992 993 if (B) { 994 *B = C; 995 } else { 996 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1003 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1004 { 1005 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1006 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1007 PetscScalar *aa; 1008 FILE *file; 1009 1010 PetscFunctionBegin; 1011 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1012 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 1013 col_lens[0] = MAT_FILE_COOKIE; 1014 1015 col_lens[1] = A->m; 1016 col_lens[2] = A->n; 1017 col_lens[3] = a->nz*bs2; 1018 1019 /* store lengths of each row and write (including header) to file */ 1020 count = 0; 1021 for (i=0; i<a->mbs; i++) { 1022 for (j=0; j<bs; j++) { 1023 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1024 } 1025 } 1026 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 1027 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1028 1029 /* store column indices (zero start index) */ 1030 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 1031 count = 0; 1032 for (i=0; i<a->mbs; i++) { 1033 for (j=0; j<bs; j++) { 1034 for (k=a->i[i]; k<a->i[i+1]; k++) { 1035 for (l=0; l<bs; l++) { 1036 jj[count++] = bs*a->j[k] + l; 1037 } 1038 } 1039 } 1040 } 1041 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 1042 ierr = PetscFree(jj);CHKERRQ(ierr); 1043 1044 /* store nonzero values */ 1045 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1046 count = 0; 1047 for (i=0; i<a->mbs; i++) { 1048 for (j=0; j<bs; j++) { 1049 for (k=a->i[i]; k<a->i[i+1]; k++) { 1050 for (l=0; l<bs; l++) { 1051 aa[count++] = a->a[bs2*k + l*bs + j]; 1052 } 1053 } 1054 } 1055 } 1056 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 1057 ierr = PetscFree(aa);CHKERRQ(ierr); 1058 1059 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1060 if (file) { 1061 fprintf(file,"-matload_block_size %d\n",a->bs); 1062 } 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1068 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1069 { 1070 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1071 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 1072 PetscViewerFormat format; 1073 1074 PetscFunctionBegin; 1075 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1076 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1077 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1078 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1079 Mat aij; 1080 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1081 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1082 ierr = MatDestroy(aij);CHKERRQ(ierr); 1083 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1084 PetscFunctionReturn(0); 1085 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1086 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1087 for (i=0; i<a->mbs; i++) { 1088 for (j=0; j<bs; j++) { 1089 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1090 for (k=a->i[i]; k<a->i[i+1]; k++) { 1091 for (l=0; l<bs; l++) { 1092 #if defined(PETSC_USE_COMPLEX) 1093 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1094 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 1095 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1096 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1097 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 1098 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1099 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1100 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1101 } 1102 #else 1103 if (a->a[bs2*k + l*bs + j] != 0.0) { 1104 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1105 } 1106 #endif 1107 } 1108 } 1109 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1110 } 1111 } 1112 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1113 } else { 1114 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1115 for (i=0; i<a->mbs; i++) { 1116 for (j=0; j<bs; j++) { 1117 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1118 for (k=a->i[i]; k<a->i[i+1]; k++) { 1119 for (l=0; l<bs; l++) { 1120 #if defined(PETSC_USE_COMPLEX) 1121 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1122 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 1123 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1124 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1125 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 1126 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1127 } else { 1128 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1129 } 1130 #else 1131 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1132 #endif 1133 } 1134 } 1135 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1136 } 1137 } 1138 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1139 } 1140 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1146 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1147 { 1148 Mat A = (Mat) Aa; 1149 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1150 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1151 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1152 MatScalar *aa; 1153 PetscViewer viewer; 1154 1155 PetscFunctionBegin; 1156 1157 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1158 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1159 1160 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1161 1162 /* loop over matrix elements drawing boxes */ 1163 color = PETSC_DRAW_BLUE; 1164 for (i=0,row=0; i<mbs; i++,row+=bs) { 1165 for (j=a->i[i]; j<a->i[i+1]; j++) { 1166 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1167 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1168 aa = a->a + j*bs2; 1169 for (k=0; k<bs; k++) { 1170 for (l=0; l<bs; l++) { 1171 if (PetscRealPart(*aa++) >= 0.) continue; 1172 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1173 } 1174 } 1175 } 1176 } 1177 color = PETSC_DRAW_CYAN; 1178 for (i=0,row=0; i<mbs; i++,row+=bs) { 1179 for (j=a->i[i]; j<a->i[i+1]; j++) { 1180 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1181 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1182 aa = a->a + j*bs2; 1183 for (k=0; k<bs; k++) { 1184 for (l=0; l<bs; l++) { 1185 if (PetscRealPart(*aa++) != 0.) continue; 1186 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1187 } 1188 } 1189 } 1190 } 1191 1192 color = PETSC_DRAW_RED; 1193 for (i=0,row=0; i<mbs; i++,row+=bs) { 1194 for (j=a->i[i]; j<a->i[i+1]; j++) { 1195 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1196 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1197 aa = a->a + j*bs2; 1198 for (k=0; k<bs; k++) { 1199 for (l=0; l<bs; l++) { 1200 if (PetscRealPart(*aa++) <= 0.) continue; 1201 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1202 } 1203 } 1204 } 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1211 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1212 { 1213 int ierr; 1214 PetscReal xl,yl,xr,yr,w,h; 1215 PetscDraw draw; 1216 PetscTruth isnull; 1217 1218 PetscFunctionBegin; 1219 1220 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1221 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1222 1223 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1224 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1225 xr += w; yr += h; xl = -w; yl = -h; 1226 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1227 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1228 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatView_SeqBAIJ" 1234 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1235 { 1236 int ierr; 1237 PetscTruth isascii,isbinary,isdraw; 1238 1239 PetscFunctionBegin; 1240 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 1241 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1242 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1243 if (isascii){ 1244 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1245 } else if (isbinary) { 1246 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1247 } else if (isdraw) { 1248 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1249 } else { 1250 Mat B; 1251 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1252 ierr = MatView(B,viewer);CHKERRQ(ierr); 1253 ierr = MatDestroy(B);CHKERRQ(ierr); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1261 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 1262 { 1263 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1264 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1265 int *ai = a->i,*ailen = a->ilen; 1266 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1267 MatScalar *ap,*aa = a->a,zero = 0.0; 1268 1269 PetscFunctionBegin; 1270 for (k=0; k<m; k++) { /* loop over rows */ 1271 row = im[k]; brow = row/bs; 1272 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1273 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 1274 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1275 nrow = ailen[brow]; 1276 for (l=0; l<n; l++) { /* loop over columns */ 1277 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1278 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 1279 col = in[l] ; 1280 bcol = col/bs; 1281 cidx = col%bs; 1282 ridx = row%bs; 1283 high = nrow; 1284 low = 0; /* assume unsorted */ 1285 while (high-low > 5) { 1286 t = (low+high)/2; 1287 if (rp[t] > bcol) high = t; 1288 else low = t; 1289 } 1290 for (i=low; i<high; i++) { 1291 if (rp[i] > bcol) break; 1292 if (rp[i] == bcol) { 1293 *v++ = ap[bs2*i+bs*cidx+ridx]; 1294 goto finished; 1295 } 1296 } 1297 *v++ = zero; 1298 finished:; 1299 } 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #if defined(PETSC_USE_MAT_SINGLE) 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1307 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 1308 { 1309 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1310 int ierr,i,N = m*n*b->bs2; 1311 MatScalar *vsingle; 1312 1313 PetscFunctionBegin; 1314 if (N > b->setvalueslen) { 1315 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1316 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1317 b->setvalueslen = N; 1318 } 1319 vsingle = b->setvaluescopy; 1320 for (i=0; i<N; i++) { 1321 vsingle[i] = v[i]; 1322 } 1323 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 #endif 1327 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1331 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 1332 { 1333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1334 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1335 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1336 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 1337 PetscTruth roworiented=a->roworiented; 1338 const MatScalar *value = v; 1339 MatScalar *ap,*aa = a->a,*bap; 1340 1341 PetscFunctionBegin; 1342 if (roworiented) { 1343 stepval = (n-1)*bs; 1344 } else { 1345 stepval = (m-1)*bs; 1346 } 1347 for (k=0; k<m; k++) { /* loop over added rows */ 1348 row = im[k]; 1349 if (row < 0) continue; 1350 #if defined(PETSC_USE_BOPT_g) 1351 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 1352 #endif 1353 rp = aj + ai[row]; 1354 ap = aa + bs2*ai[row]; 1355 rmax = imax[row]; 1356 nrow = ailen[row]; 1357 low = 0; 1358 for (l=0; l<n; l++) { /* loop over added columns */ 1359 if (in[l] < 0) continue; 1360 #if defined(PETSC_USE_BOPT_g) 1361 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 1362 #endif 1363 col = in[l]; 1364 if (roworiented) { 1365 value = v + k*(stepval+bs)*bs + l*bs; 1366 } else { 1367 value = v + l*(stepval+bs)*bs + k*bs; 1368 } 1369 if (!sorted) low = 0; high = nrow; 1370 while (high-low > 7) { 1371 t = (low+high)/2; 1372 if (rp[t] > col) high = t; 1373 else low = t; 1374 } 1375 for (i=low; i<high; i++) { 1376 if (rp[i] > col) break; 1377 if (rp[i] == col) { 1378 bap = ap + bs2*i; 1379 if (roworiented) { 1380 if (is == ADD_VALUES) { 1381 for (ii=0; ii<bs; ii++,value+=stepval) { 1382 for (jj=ii; jj<bs2; jj+=bs) { 1383 bap[jj] += *value++; 1384 } 1385 } 1386 } else { 1387 for (ii=0; ii<bs; ii++,value+=stepval) { 1388 for (jj=ii; jj<bs2; jj+=bs) { 1389 bap[jj] = *value++; 1390 } 1391 } 1392 } 1393 } else { 1394 if (is == ADD_VALUES) { 1395 for (ii=0; ii<bs; ii++,value+=stepval) { 1396 for (jj=0; jj<bs; jj++) { 1397 *bap++ += *value++; 1398 } 1399 } 1400 } else { 1401 for (ii=0; ii<bs; ii++,value+=stepval) { 1402 for (jj=0; jj<bs; jj++) { 1403 *bap++ = *value++; 1404 } 1405 } 1406 } 1407 } 1408 goto noinsert2; 1409 } 1410 } 1411 if (nonew == 1) goto noinsert2; 1412 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1413 if (nrow >= rmax) { 1414 /* there is no extra room in row, therefore enlarge */ 1415 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1416 MatScalar *new_a; 1417 1418 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1419 1420 /* malloc new storage space */ 1421 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1422 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1423 new_j = (int*)(new_a + bs2*new_nz); 1424 new_i = new_j + new_nz; 1425 1426 /* copy over old data into new slots */ 1427 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1428 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1429 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 1430 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1431 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 1432 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1433 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1434 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1435 /* free up old matrix storage */ 1436 ierr = PetscFree(a->a);CHKERRQ(ierr); 1437 if (!a->singlemalloc) { 1438 ierr = PetscFree(a->i);CHKERRQ(ierr); 1439 ierr = PetscFree(a->j);CHKERRQ(ierr); 1440 } 1441 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1442 a->singlemalloc = PETSC_TRUE; 1443 1444 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1445 rmax = imax[row] = imax[row] + CHUNKSIZE; 1446 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1447 a->maxnz += bs2*CHUNKSIZE; 1448 a->reallocs++; 1449 a->nz++; 1450 } 1451 N = nrow++ - 1; 1452 /* shift up all the later entries in this row */ 1453 for (ii=N; ii>=i; ii--) { 1454 rp[ii+1] = rp[ii]; 1455 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1456 } 1457 if (N >= i) { 1458 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1459 } 1460 rp[i] = col; 1461 bap = ap + bs2*i; 1462 if (roworiented) { 1463 for (ii=0; ii<bs; ii++,value+=stepval) { 1464 for (jj=ii; jj<bs2; jj+=bs) { 1465 bap[jj] = *value++; 1466 } 1467 } 1468 } else { 1469 for (ii=0; ii<bs; ii++,value+=stepval) { 1470 for (jj=0; jj<bs; jj++) { 1471 *bap++ = *value++; 1472 } 1473 } 1474 } 1475 noinsert2:; 1476 low = i; 1477 } 1478 ailen[row] = nrow; 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1485 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1486 { 1487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1488 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1489 int m = A->m,*ip,N,*ailen = a->ilen; 1490 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 1491 MatScalar *aa = a->a,*ap; 1492 1493 PetscFunctionBegin; 1494 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1495 1496 if (m) rmax = ailen[0]; 1497 for (i=1; i<mbs; i++) { 1498 /* move each row back by the amount of empty slots (fshift) before it*/ 1499 fshift += imax[i-1] - ailen[i-1]; 1500 rmax = PetscMax(rmax,ailen[i]); 1501 if (fshift) { 1502 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1503 N = ailen[i]; 1504 for (j=0; j<N; j++) { 1505 ip[j-fshift] = ip[j]; 1506 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1507 } 1508 } 1509 ai[i] = ai[i-1] + ailen[i-1]; 1510 } 1511 if (mbs) { 1512 fshift += imax[mbs-1] - ailen[mbs-1]; 1513 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1514 } 1515 /* reset ilen and imax for each row */ 1516 for (i=0; i<mbs; i++) { 1517 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1518 } 1519 a->nz = ai[mbs]; 1520 1521 /* diagonals may have moved, so kill the diagonal pointers */ 1522 a->idiagvalid = PETSC_FALSE; 1523 if (fshift && a->diag) { 1524 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1525 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1526 a->diag = 0; 1527 } 1528 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1529 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1530 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1531 a->reallocs = 0; 1532 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1533 1534 PetscFunctionReturn(0); 1535 } 1536 1537 1538 1539 /* 1540 This function returns an array of flags which indicate the locations of contiguous 1541 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1542 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1543 Assume: sizes should be long enough to hold all the values. 1544 */ 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1547 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1548 { 1549 int i,j,k,row; 1550 PetscTruth flg; 1551 1552 PetscFunctionBegin; 1553 for (i=0,j=0; i<n; j++) { 1554 row = idx[i]; 1555 if (row%bs!=0) { /* Not the begining of a block */ 1556 sizes[j] = 1; 1557 i++; 1558 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1559 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1560 i++; 1561 } else { /* Begining of the block, so check if the complete block exists */ 1562 flg = PETSC_TRUE; 1563 for (k=1; k<bs; k++) { 1564 if (row+k != idx[i+k]) { /* break in the block */ 1565 flg = PETSC_FALSE; 1566 break; 1567 } 1568 } 1569 if (flg == PETSC_TRUE) { /* No break in the bs */ 1570 sizes[j] = bs; 1571 i+= bs; 1572 } else { 1573 sizes[j] = 1; 1574 i++; 1575 } 1576 } 1577 } 1578 *bs_max = j; 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1584 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1585 { 1586 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1587 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1588 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1589 PetscScalar zero = 0.0; 1590 MatScalar *aa; 1591 1592 PetscFunctionBegin; 1593 /* Make a copy of the IS and sort it */ 1594 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1595 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1596 1597 /* allocate memory for rows,sizes */ 1598 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1599 sizes = rows + is_n; 1600 1601 /* copy IS values to rows, and sort them */ 1602 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1603 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1604 if (baij->keepzeroedrows) { 1605 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1606 bs_max = is_n; 1607 } else { 1608 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1609 } 1610 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1611 1612 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1613 row = rows[j]; 1614 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1615 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1616 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1617 if (sizes[i] == bs && !baij->keepzeroedrows) { 1618 if (diag) { 1619 if (baij->ilen[row/bs] > 0) { 1620 baij->ilen[row/bs] = 1; 1621 baij->j[baij->i[row/bs]] = row/bs; 1622 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1623 } 1624 /* Now insert all the diagonal values for this bs */ 1625 for (k=0; k<bs; k++) { 1626 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1627 } 1628 } else { /* (!diag) */ 1629 baij->ilen[row/bs] = 0; 1630 } /* end (!diag) */ 1631 } else { /* (sizes[i] != bs) */ 1632 #if defined (PETSC_USE_DEBUG) 1633 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1634 #endif 1635 for (k=0; k<count; k++) { 1636 aa[0] = zero; 1637 aa += bs; 1638 } 1639 if (diag) { 1640 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1641 } 1642 } 1643 } 1644 1645 ierr = PetscFree(rows);CHKERRQ(ierr); 1646 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1652 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1653 { 1654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1655 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1656 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1657 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1658 int ridx,cidx,bs2=a->bs2,ierr; 1659 PetscTruth roworiented=a->roworiented; 1660 MatScalar *ap,value,*aa=a->a,*bap; 1661 1662 PetscFunctionBegin; 1663 for (k=0; k<m; k++) { /* loop over added rows */ 1664 row = im[k]; brow = row/bs; 1665 if (row < 0) continue; 1666 #if defined(PETSC_USE_BOPT_g) 1667 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1668 #endif 1669 rp = aj + ai[brow]; 1670 ap = aa + bs2*ai[brow]; 1671 rmax = imax[brow]; 1672 nrow = ailen[brow]; 1673 low = 0; 1674 for (l=0; l<n; l++) { /* loop over added columns */ 1675 if (in[l] < 0) continue; 1676 #if defined(PETSC_USE_BOPT_g) 1677 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1678 #endif 1679 col = in[l]; bcol = col/bs; 1680 ridx = row % bs; cidx = col % bs; 1681 if (roworiented) { 1682 value = v[l + k*n]; 1683 } else { 1684 value = v[k + l*m]; 1685 } 1686 if (!sorted) low = 0; high = nrow; 1687 while (high-low > 7) { 1688 t = (low+high)/2; 1689 if (rp[t] > bcol) high = t; 1690 else low = t; 1691 } 1692 for (i=low; i<high; i++) { 1693 if (rp[i] > bcol) break; 1694 if (rp[i] == bcol) { 1695 bap = ap + bs2*i + bs*cidx + ridx; 1696 if (is == ADD_VALUES) *bap += value; 1697 else *bap = value; 1698 goto noinsert1; 1699 } 1700 } 1701 if (nonew == 1) goto noinsert1; 1702 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1703 if (nrow >= rmax) { 1704 /* there is no extra room in row, therefore enlarge */ 1705 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1706 MatScalar *new_a; 1707 1708 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1709 1710 /* Malloc new storage space */ 1711 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1712 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1713 new_j = (int*)(new_a + bs2*new_nz); 1714 new_i = new_j + new_nz; 1715 1716 /* copy over old data into new slots */ 1717 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1718 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1719 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1720 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1721 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1722 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1723 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1724 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1725 /* free up old matrix storage */ 1726 ierr = PetscFree(a->a);CHKERRQ(ierr); 1727 if (!a->singlemalloc) { 1728 ierr = PetscFree(a->i);CHKERRQ(ierr); 1729 ierr = PetscFree(a->j);CHKERRQ(ierr); 1730 } 1731 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1732 a->singlemalloc = PETSC_TRUE; 1733 1734 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1735 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1736 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1737 a->maxnz += bs2*CHUNKSIZE; 1738 a->reallocs++; 1739 a->nz++; 1740 } 1741 N = nrow++ - 1; 1742 /* shift up all the later entries in this row */ 1743 for (ii=N; ii>=i; ii--) { 1744 rp[ii+1] = rp[ii]; 1745 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1746 } 1747 if (N>=i) { 1748 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1749 } 1750 rp[i] = bcol; 1751 ap[bs2*i + bs*cidx + ridx] = value; 1752 noinsert1:; 1753 low = i; 1754 } 1755 ailen[brow] = nrow; 1756 } 1757 PetscFunctionReturn(0); 1758 } 1759 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1763 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1764 { 1765 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1766 Mat outA; 1767 int ierr; 1768 PetscTruth row_identity,col_identity; 1769 1770 PetscFunctionBegin; 1771 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1772 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1773 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1774 if (!row_identity || !col_identity) { 1775 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1776 } 1777 1778 outA = inA; 1779 inA->factor = FACTOR_LU; 1780 1781 if (!a->diag) { 1782 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1783 } 1784 1785 a->row = row; 1786 a->col = col; 1787 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1788 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1789 1790 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1791 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1792 PetscLogObjectParent(inA,a->icol); 1793 1794 /* 1795 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1796 for ILU(0) factorization with natural ordering 1797 */ 1798 if (a->bs < 8) { 1799 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1800 } else { 1801 if (!a->solve_work) { 1802 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1803 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1804 } 1805 } 1806 1807 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1808 1809 PetscFunctionReturn(0); 1810 } 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1813 int MatPrintHelp_SeqBAIJ(Mat A) 1814 { 1815 static PetscTruth called = PETSC_FALSE; 1816 MPI_Comm comm = A->comm; 1817 int ierr; 1818 1819 PetscFunctionBegin; 1820 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1821 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1822 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 EXTERN_C_BEGIN 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1829 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1830 { 1831 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1832 int i,nz,nbs; 1833 1834 PetscFunctionBegin; 1835 nz = baij->maxnz/baij->bs2; 1836 nbs = baij->nbs; 1837 for (i=0; i<nz; i++) { 1838 baij->j[i] = indices[i]; 1839 } 1840 baij->nz = nz; 1841 for (i=0; i<nbs; i++) { 1842 baij->ilen[i] = baij->imax[i]; 1843 } 1844 1845 PetscFunctionReturn(0); 1846 } 1847 EXTERN_C_END 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1851 /*@ 1852 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1853 in the matrix. 1854 1855 Input Parameters: 1856 + mat - the SeqBAIJ matrix 1857 - indices - the column indices 1858 1859 Level: advanced 1860 1861 Notes: 1862 This can be called if you have precomputed the nonzero structure of the 1863 matrix and want to provide it to the matrix object to improve the performance 1864 of the MatSetValues() operation. 1865 1866 You MUST have set the correct numbers of nonzeros per row in the call to 1867 MatCreateSeqBAIJ(). 1868 1869 MUST be called before any calls to MatSetValues(); 1870 1871 @*/ 1872 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1873 { 1874 int ierr,(*f)(Mat,int *); 1875 1876 PetscFunctionBegin; 1877 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1878 PetscValidPointer(indices,2); 1879 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1880 if (f) { 1881 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1882 } else { 1883 SETERRQ(1,"Wrong type of matrix to set column indices"); 1884 } 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1890 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1891 { 1892 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1893 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1894 PetscReal atmp; 1895 PetscScalar *x,zero = 0.0; 1896 MatScalar *aa; 1897 int ncols,brow,krow,kcol; 1898 1899 PetscFunctionBegin; 1900 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1901 bs = a->bs; 1902 aa = a->a; 1903 ai = a->i; 1904 aj = a->j; 1905 mbs = a->mbs; 1906 1907 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1908 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1909 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1910 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1911 for (i=0; i<mbs; i++) { 1912 ncols = ai[1] - ai[0]; ai++; 1913 brow = bs*i; 1914 for (j=0; j<ncols; j++){ 1915 /* bcol = bs*(*aj); */ 1916 for (kcol=0; kcol<bs; kcol++){ 1917 for (krow=0; krow<bs; krow++){ 1918 atmp = PetscAbsScalar(*aa); aa++; 1919 row = brow + krow; /* row index */ 1920 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1921 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1922 } 1923 } 1924 aj++; 1925 } 1926 } 1927 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1933 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1934 { 1935 int ierr; 1936 1937 PetscFunctionBegin; 1938 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1944 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1945 { 1946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1947 PetscFunctionBegin; 1948 *array = a->a; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1954 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1955 { 1956 PetscFunctionBegin; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #include "petscblaslapack.h" 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1963 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1964 { 1965 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1966 int ierr,one=1,i,bs=y->bs,j,bs2; 1967 1968 PetscFunctionBegin; 1969 if (str == SAME_NONZERO_PATTERN) { 1970 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1971 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1972 if (y->xtoy && y->XtoY != X) { 1973 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1974 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1975 } 1976 if (!y->xtoy) { /* get xtoy */ 1977 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1978 y->XtoY = X; 1979 } 1980 bs2 = bs*bs; 1981 for (i=0; i<x->nz; i++) { 1982 j = 0; 1983 while (j < bs2){ 1984 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1985 j++; 1986 } 1987 } 1988 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1989 } else { 1990 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1991 } 1992 PetscFunctionReturn(0); 1993 } 1994 1995 /* -------------------------------------------------------------------*/ 1996 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1997 MatGetRow_SeqBAIJ, 1998 MatRestoreRow_SeqBAIJ, 1999 MatMult_SeqBAIJ_N, 2000 /* 4*/ MatMultAdd_SeqBAIJ_N, 2001 MatMultTranspose_SeqBAIJ, 2002 MatMultTransposeAdd_SeqBAIJ, 2003 MatSolve_SeqBAIJ_N, 2004 0, 2005 0, 2006 /*10*/ 0, 2007 MatLUFactor_SeqBAIJ, 2008 0, 2009 0, 2010 MatTranspose_SeqBAIJ, 2011 /*15*/ MatGetInfo_SeqBAIJ, 2012 MatEqual_SeqBAIJ, 2013 MatGetDiagonal_SeqBAIJ, 2014 MatDiagonalScale_SeqBAIJ, 2015 MatNorm_SeqBAIJ, 2016 /*20*/ 0, 2017 MatAssemblyEnd_SeqBAIJ, 2018 0, 2019 MatSetOption_SeqBAIJ, 2020 MatZeroEntries_SeqBAIJ, 2021 /*25*/ MatZeroRows_SeqBAIJ, 2022 MatLUFactorSymbolic_SeqBAIJ, 2023 MatLUFactorNumeric_SeqBAIJ_N, 2024 0, 2025 0, 2026 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2027 MatILUFactorSymbolic_SeqBAIJ, 2028 0, 2029 MatGetArray_SeqBAIJ, 2030 MatRestoreArray_SeqBAIJ, 2031 /*35*/ MatDuplicate_SeqBAIJ, 2032 0, 2033 0, 2034 MatILUFactor_SeqBAIJ, 2035 0, 2036 /*40*/ MatAXPY_SeqBAIJ, 2037 MatGetSubMatrices_SeqBAIJ, 2038 MatIncreaseOverlap_SeqBAIJ, 2039 MatGetValues_SeqBAIJ, 2040 0, 2041 /*45*/ MatPrintHelp_SeqBAIJ, 2042 MatScale_SeqBAIJ, 2043 0, 2044 0, 2045 0, 2046 /*50*/ MatGetBlockSize_SeqBAIJ, 2047 MatGetRowIJ_SeqBAIJ, 2048 MatRestoreRowIJ_SeqBAIJ, 2049 0, 2050 0, 2051 /*55*/ 0, 2052 0, 2053 0, 2054 0, 2055 MatSetValuesBlocked_SeqBAIJ, 2056 /*60*/ MatGetSubMatrix_SeqBAIJ, 2057 MatDestroy_SeqBAIJ, 2058 MatView_SeqBAIJ, 2059 MatGetPetscMaps_Petsc, 2060 0, 2061 /*65*/ 0, 2062 0, 2063 0, 2064 0, 2065 0, 2066 /*70*/ MatGetRowMax_SeqBAIJ, 2067 MatConvert_Basic, 2068 0, 2069 0, 2070 0, 2071 /*75*/ 0, 2072 0, 2073 0, 2074 0, 2075 0, 2076 /*80*/ 0, 2077 0, 2078 0, 2079 0, 2080 /*85*/ MatLoad_SeqBAIJ, 2081 0, 2082 0, 2083 0, 2084 0 2085 }; 2086 2087 EXTERN_C_BEGIN 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2090 int MatStoreValues_SeqBAIJ(Mat mat) 2091 { 2092 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2093 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 2094 int ierr; 2095 2096 PetscFunctionBegin; 2097 if (aij->nonew != 1) { 2098 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2099 } 2100 2101 /* allocate space for values if not already there */ 2102 if (!aij->saved_values) { 2103 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2104 } 2105 2106 /* copy values over */ 2107 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2108 PetscFunctionReturn(0); 2109 } 2110 EXTERN_C_END 2111 2112 EXTERN_C_BEGIN 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2115 int MatRetrieveValues_SeqBAIJ(Mat mat) 2116 { 2117 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2118 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 2119 2120 PetscFunctionBegin; 2121 if (aij->nonew != 1) { 2122 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2123 } 2124 if (!aij->saved_values) { 2125 SETERRQ(1,"Must call MatStoreValues(A);first"); 2126 } 2127 2128 /* copy values over */ 2129 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 EXTERN_C_END 2133 2134 EXTERN_C_BEGIN 2135 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2136 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2137 EXTERN_C_END 2138 2139 EXTERN_C_BEGIN 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2142 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 2143 { 2144 Mat_SeqBAIJ *b; 2145 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 2146 PetscTruth flg; 2147 2148 PetscFunctionBegin; 2149 2150 B->preallocated = PETSC_TRUE; 2151 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2152 if (nnz && newbs != bs) { 2153 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 2154 } 2155 bs = newbs; 2156 2157 mbs = B->m/bs; 2158 nbs = B->n/bs; 2159 bs2 = bs*bs; 2160 2161 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2162 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2163 } 2164 2165 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2166 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2167 if (nnz) { 2168 for (i=0; i<mbs; i++) { 2169 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2170 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2171 } 2172 } 2173 2174 b = (Mat_SeqBAIJ*)B->data; 2175 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2176 B->ops->solve = MatSolve_SeqBAIJ_Update; 2177 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2178 if (!flg) { 2179 switch (bs) { 2180 case 1: 2181 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2182 B->ops->mult = MatMult_SeqBAIJ_1; 2183 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2184 break; 2185 case 2: 2186 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2187 B->ops->mult = MatMult_SeqBAIJ_2; 2188 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2189 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2190 break; 2191 case 3: 2192 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2193 B->ops->mult = MatMult_SeqBAIJ_3; 2194 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2195 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2196 break; 2197 case 4: 2198 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2199 B->ops->mult = MatMult_SeqBAIJ_4; 2200 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2201 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2202 break; 2203 case 5: 2204 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2205 B->ops->mult = MatMult_SeqBAIJ_5; 2206 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2207 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2208 break; 2209 case 6: 2210 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2211 B->ops->mult = MatMult_SeqBAIJ_6; 2212 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2213 break; 2214 case 7: 2215 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2216 B->ops->mult = MatMult_SeqBAIJ_7; 2217 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2218 break; 2219 default: 2220 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2221 B->ops->mult = MatMult_SeqBAIJ_N; 2222 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2223 break; 2224 } 2225 } 2226 b->bs = bs; 2227 b->mbs = mbs; 2228 b->nbs = nbs; 2229 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2230 if (!nnz) { 2231 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2232 else if (nz <= 0) nz = 1; 2233 for (i=0; i<mbs; i++) b->imax[i] = nz; 2234 nz = nz*mbs; 2235 } else { 2236 nz = 0; 2237 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2238 } 2239 2240 /* allocate the matrix space */ 2241 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2242 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2243 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2244 b->j = (int*)(b->a + nz*bs2); 2245 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2246 b->i = b->j + nz; 2247 b->singlemalloc = PETSC_TRUE; 2248 2249 b->i[0] = 0; 2250 for (i=1; i<mbs+1; i++) { 2251 b->i[i] = b->i[i-1] + b->imax[i-1]; 2252 } 2253 2254 /* b->ilen will count nonzeros in each block row so far. */ 2255 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2256 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2257 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2258 2259 b->bs = bs; 2260 b->bs2 = bs2; 2261 b->mbs = mbs; 2262 b->nz = 0; 2263 b->maxnz = nz*bs2; 2264 B->info.nz_unneeded = (PetscReal)b->maxnz; 2265 PetscFunctionReturn(0); 2266 } 2267 EXTERN_C_END 2268 2269 /*MC 2270 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2271 block sparse compressed row format. 2272 2273 Options Database Keys: 2274 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2275 2276 Level: beginner 2277 2278 .seealso: MatCreateSeqBAIJ 2279 M*/ 2280 2281 EXTERN_C_BEGIN 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatCreate_SeqBAIJ" 2284 int MatCreate_SeqBAIJ(Mat B) 2285 { 2286 int ierr,size; 2287 Mat_SeqBAIJ *b; 2288 2289 PetscFunctionBegin; 2290 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2291 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2292 2293 B->m = B->M = PetscMax(B->m,B->M); 2294 B->n = B->N = PetscMax(B->n,B->N); 2295 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2296 B->data = (void*)b; 2297 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2298 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2299 B->factor = 0; 2300 B->lupivotthreshold = 1.0; 2301 B->mapping = 0; 2302 b->row = 0; 2303 b->col = 0; 2304 b->icol = 0; 2305 b->reallocs = 0; 2306 b->saved_values = 0; 2307 #if defined(PETSC_USE_MAT_SINGLE) 2308 b->setvalueslen = 0; 2309 b->setvaluescopy = PETSC_NULL; 2310 #endif 2311 2312 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2313 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2314 2315 b->sorted = PETSC_FALSE; 2316 b->roworiented = PETSC_TRUE; 2317 b->nonew = 0; 2318 b->diag = 0; 2319 b->solve_work = 0; 2320 b->mult_work = 0; 2321 B->spptr = 0; 2322 B->info.nz_unneeded = (PetscReal)b->maxnz; 2323 b->keepzeroedrows = PETSC_FALSE; 2324 b->xtoy = 0; 2325 b->XtoY = 0; 2326 2327 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2328 "MatStoreValues_SeqBAIJ", 2329 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2330 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2331 "MatRetrieveValues_SeqBAIJ", 2332 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2333 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2334 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2335 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2336 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2337 "MatConvert_SeqBAIJ_SeqAIJ", 2338 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2340 "MatConvert_SeqBAIJ_SeqSBAIJ", 2341 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2343 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2344 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2345 PetscFunctionReturn(0); 2346 } 2347 EXTERN_C_END 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2351 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2352 { 2353 Mat C; 2354 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2355 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 2356 2357 PetscFunctionBegin; 2358 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2359 2360 *B = 0; 2361 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2362 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 2363 c = (Mat_SeqBAIJ*)C->data; 2364 2365 C->M = A->M; 2366 C->N = A->N; 2367 c->bs = a->bs; 2368 c->bs2 = a->bs2; 2369 c->mbs = a->mbs; 2370 c->nbs = a->nbs; 2371 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2372 2373 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 2374 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 2375 for (i=0; i<mbs; i++) { 2376 c->imax[i] = a->imax[i]; 2377 c->ilen[i] = a->ilen[i]; 2378 } 2379 2380 /* allocate the matrix space */ 2381 c->singlemalloc = PETSC_TRUE; 2382 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 2383 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2384 c->j = (int*)(c->a + nz*bs2); 2385 c->i = c->j + nz; 2386 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 2387 if (mbs > 0) { 2388 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 2389 if (cpvalues == MAT_COPY_VALUES) { 2390 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2391 } else { 2392 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2393 } 2394 } 2395 2396 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2397 c->sorted = a->sorted; 2398 c->roworiented = a->roworiented; 2399 c->nonew = a->nonew; 2400 2401 if (a->diag) { 2402 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 2403 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 2404 for (i=0; i<mbs; i++) { 2405 c->diag[i] = a->diag[i]; 2406 } 2407 } else c->diag = 0; 2408 c->nz = a->nz; 2409 c->maxnz = a->maxnz; 2410 c->solve_work = 0; 2411 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2412 c->mult_work = 0; 2413 C->preallocated = PETSC_TRUE; 2414 C->assembled = PETSC_TRUE; 2415 *B = C; 2416 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatLoad_SeqBAIJ" 2422 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2423 { 2424 Mat_SeqBAIJ *a; 2425 Mat B; 2426 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 2427 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2428 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2429 int *masked,nmask,tmp,bs2,ishift; 2430 PetscScalar *aa; 2431 MPI_Comm comm = ((PetscObject)viewer)->comm; 2432 2433 PetscFunctionBegin; 2434 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2435 bs2 = bs*bs; 2436 2437 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2438 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2439 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2440 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2441 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2442 M = header[1]; N = header[2]; nz = header[3]; 2443 2444 if (header[3] < 0) { 2445 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2446 } 2447 2448 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2449 2450 /* 2451 This code adds extra rows to make sure the number of rows is 2452 divisible by the blocksize 2453 */ 2454 mbs = M/bs; 2455 extra_rows = bs - M + bs*(mbs); 2456 if (extra_rows == bs) extra_rows = 0; 2457 else mbs++; 2458 if (extra_rows) { 2459 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2460 } 2461 2462 /* read in row lengths */ 2463 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2464 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2465 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2466 2467 /* read in column indices */ 2468 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 2469 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2470 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2471 2472 /* loop over row lengths determining block row lengths */ 2473 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 2474 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 2475 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 2476 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 2477 masked = mask + mbs; 2478 rowcount = 0; nzcount = 0; 2479 for (i=0; i<mbs; i++) { 2480 nmask = 0; 2481 for (j=0; j<bs; j++) { 2482 kmax = rowlengths[rowcount]; 2483 for (k=0; k<kmax; k++) { 2484 tmp = jj[nzcount++]/bs; 2485 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2486 } 2487 rowcount++; 2488 } 2489 browlengths[i] += nmask; 2490 /* zero out the mask elements we set */ 2491 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2492 } 2493 2494 /* create our matrix */ 2495 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2496 ierr = MatSetType(B,type);CHKERRQ(ierr); 2497 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2498 a = (Mat_SeqBAIJ*)B->data; 2499 2500 /* set matrix "i" values */ 2501 a->i[0] = 0; 2502 for (i=1; i<= mbs; i++) { 2503 a->i[i] = a->i[i-1] + browlengths[i-1]; 2504 a->ilen[i-1] = browlengths[i-1]; 2505 } 2506 a->nz = 0; 2507 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2508 2509 /* read in nonzero values */ 2510 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2511 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2512 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2513 2514 /* set "a" and "j" values into matrix */ 2515 nzcount = 0; jcount = 0; 2516 for (i=0; i<mbs; i++) { 2517 nzcountb = nzcount; 2518 nmask = 0; 2519 for (j=0; j<bs; j++) { 2520 kmax = rowlengths[i*bs+j]; 2521 for (k=0; k<kmax; k++) { 2522 tmp = jj[nzcount++]/bs; 2523 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2524 } 2525 } 2526 /* sort the masked values */ 2527 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2528 2529 /* set "j" values into matrix */ 2530 maskcount = 1; 2531 for (j=0; j<nmask; j++) { 2532 a->j[jcount++] = masked[j]; 2533 mask[masked[j]] = maskcount++; 2534 } 2535 /* set "a" values into matrix */ 2536 ishift = bs2*a->i[i]; 2537 for (j=0; j<bs; j++) { 2538 kmax = rowlengths[i*bs+j]; 2539 for (k=0; k<kmax; k++) { 2540 tmp = jj[nzcountb]/bs ; 2541 block = mask[tmp] - 1; 2542 point = jj[nzcountb] - bs*tmp; 2543 idx = ishift + bs2*block + j + bs*point; 2544 a->a[idx] = (MatScalar)aa[nzcountb++]; 2545 } 2546 } 2547 /* zero out the mask elements we set */ 2548 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2549 } 2550 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2551 2552 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2553 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2554 ierr = PetscFree(aa);CHKERRQ(ierr); 2555 ierr = PetscFree(jj);CHKERRQ(ierr); 2556 ierr = PetscFree(mask);CHKERRQ(ierr); 2557 2558 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 ierr = MatView_Private(B);CHKERRQ(ierr); 2561 2562 *A = B; 2563 PetscFunctionReturn(0); 2564 } 2565 2566 #undef __FUNCT__ 2567 #define __FUNCT__ "MatCreateSeqBAIJ" 2568 /*@C 2569 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2570 compressed row) format. For good matrix assembly performance the 2571 user should preallocate the matrix storage by setting the parameter nz 2572 (or the array nnz). By setting these parameters accurately, performance 2573 during matrix assembly can be increased by more than a factor of 50. 2574 2575 Collective on MPI_Comm 2576 2577 Input Parameters: 2578 + comm - MPI communicator, set to PETSC_COMM_SELF 2579 . bs - size of block 2580 . m - number of rows 2581 . n - number of columns 2582 . nz - number of nonzero blocks per block row (same for all rows) 2583 - nnz - array containing the number of nonzero blocks in the various block rows 2584 (possibly different for each block row) or PETSC_NULL 2585 2586 Output Parameter: 2587 . A - the matrix 2588 2589 Options Database Keys: 2590 . -mat_no_unroll - uses code that does not unroll the loops in the 2591 block calculations (much slower) 2592 . -mat_block_size - size of the blocks to use 2593 2594 Level: intermediate 2595 2596 Notes: 2597 A nonzero block is any block that as 1 or more nonzeros in it 2598 2599 The block AIJ format is fully compatible with standard Fortran 77 2600 storage. That is, the stored row and column indices can begin at 2601 either one (as in Fortran) or zero. See the users' manual for details. 2602 2603 Specify the preallocated storage with either nz or nnz (not both). 2604 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2605 allocation. For additional details, see the users manual chapter on 2606 matrices. 2607 2608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2609 @*/ 2610 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2611 { 2612 int ierr; 2613 2614 PetscFunctionBegin; 2615 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2616 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2617 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2618 PetscFunctionReturn(0); 2619 } 2620 2621 #undef __FUNCT__ 2622 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2623 /*@C 2624 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2625 per row in the matrix. For good matrix assembly performance the 2626 user should preallocate the matrix storage by setting the parameter nz 2627 (or the array nnz). By setting these parameters accurately, performance 2628 during matrix assembly can be increased by more than a factor of 50. 2629 2630 Collective on MPI_Comm 2631 2632 Input Parameters: 2633 + A - the matrix 2634 . bs - size of block 2635 . nz - number of block nonzeros per block row (same for all rows) 2636 - nnz - array containing the number of block nonzeros in the various block rows 2637 (possibly different for each block row) or PETSC_NULL 2638 2639 Options Database Keys: 2640 . -mat_no_unroll - uses code that does not unroll the loops in the 2641 block calculations (much slower) 2642 . -mat_block_size - size of the blocks to use 2643 2644 Level: intermediate 2645 2646 Notes: 2647 The block AIJ format is fully compatible with standard Fortran 77 2648 storage. That is, the stored row and column indices can begin at 2649 either one (as in Fortran) or zero. See the users' manual for details. 2650 2651 Specify the preallocated storage with either nz or nnz (not both). 2652 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2653 allocation. For additional details, see the users manual chapter on 2654 matrices. 2655 2656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2657 @*/ 2658 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2659 { 2660 int ierr,(*f)(Mat,int,int,const int[]); 2661 2662 PetscFunctionBegin; 2663 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2664 if (f) { 2665 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2666 } 2667 PetscFunctionReturn(0); 2668 } 2669 2670