1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 int *diag_offset,i,bs = a->bs,mbs = a->mbs; 18 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 19 20 PetscFunctionBegin; 21 if (a->idiagvalid) PetscFunctionReturn(0); 22 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 23 diag_offset = a->diag; 24 if (!a->idiag) { 25 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 26 } 27 diag = a->idiag; 28 mdiag = a->idiag+bs*bs*mbs; 29 /* factor and invert each block */ 30 switch (a->bs){ 31 case 2: 32 for (i=0; i<mbs; i++) { 33 odiag = v + 4*diag_offset[i]; 34 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 35 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 36 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 37 diag += 4; 38 mdiag += 4; 39 } 40 break; 41 case 3: 42 for (i=0; i<mbs; i++) { 43 odiag = v + 9*diag_offset[i]; 44 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 45 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 46 diag[8] = odiag[8]; 47 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 48 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 49 mdiag[8] = odiag[8]; 50 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 51 diag += 9; 52 mdiag += 9; 53 } 54 break; 55 case 4: 56 for (i=0; i<mbs; i++) { 57 odiag = v + 16*diag_offset[i]; 58 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 59 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 61 diag += 16; 62 mdiag += 16; 63 } 64 break; 65 case 5: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 25*diag_offset[i]; 68 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 71 diag += 25; 72 mdiag += 25; 73 } 74 break; 75 default: 76 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %d",a->bs); 77 } 78 a->idiagvalid = PETSC_TRUE; 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 85 { 86 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 87 PetscScalar *x,x1,x2,s1,s2; 88 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 89 PetscErrorCode ierr; 90 int m = a->mbs,i,i2,nz,idx; 91 const int *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 PetscErrorCode ierr; 192 int m = a->mbs,i,i2,nz,idx; 193 const int *diag,*ai = a->i,*aj = a->j,*vi; 194 195 PetscFunctionBegin; 196 its = its*lits; 197 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 198 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 199 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 200 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 201 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 202 203 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 204 205 diag = a->diag; 206 idiag = a->idiag; 207 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209 210 if (flag & SOR_ZERO_INITIAL_GUESS) { 211 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 212 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 213 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 214 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 215 i2 = 3; 216 idiag += 9; 217 for (i=1; i<m; i++) { 218 v = aa + 9*ai[i]; 219 vi = aj + ai[i]; 220 nz = diag[i] - ai[i]; 221 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 222 while (nz--) { 223 idx = 3*(*vi++); 224 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 225 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 226 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 227 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 228 v += 9; 229 } 230 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 231 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 232 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 233 idiag += 9; 234 i2 += 3; 235 } 236 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 237 PetscLogFlops(9*(a->nz)); 238 } 239 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 240 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 241 i2 = 0; 242 mdiag = a->idiag+9*a->mbs; 243 for (i=0; i<m; i++) { 244 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 245 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 246 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 247 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 248 mdiag += 9; 249 i2 += 3; 250 } 251 PetscLogFlops(15*m); 252 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 253 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 256 idiag = a->idiag+9*a->mbs - 9; 257 i2 = 3*m - 3; 258 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 259 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 260 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 261 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 262 idiag -= 9; 263 i2 -= 3; 264 for (i=m-2; i>=0; i--) { 265 v = aa + 9*(diag[i]+1); 266 vi = aj + diag[i] + 1; 267 nz = ai[i+1] - diag[i] - 1; 268 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 269 while (nz--) { 270 idx = 3*(*vi++); 271 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 272 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 273 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 274 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 275 v += 9; 276 } 277 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 278 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 279 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 280 idiag -= 9; 281 i2 -= 3; 282 } 283 PetscLogFlops(9*(a->nz)); 284 } 285 } else { 286 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 287 } 288 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 289 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 296 { 297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 298 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 299 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 300 PetscErrorCode ierr; 301 int m = a->mbs,i,i2,nz,idx; 302 const int *diag,*ai = a->i,*aj = a->j,*vi; 303 304 PetscFunctionBegin; 305 its = its*lits; 306 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 307 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 308 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 309 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 310 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 311 312 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 313 314 diag = a->diag; 315 idiag = a->idiag; 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 318 319 if (flag & SOR_ZERO_INITIAL_GUESS) { 320 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 321 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 322 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 323 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 324 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 325 i2 = 4; 326 idiag += 16; 327 for (i=1; i<m; i++) { 328 v = aa + 16*ai[i]; 329 vi = aj + ai[i]; 330 nz = diag[i] - ai[i]; 331 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 332 while (nz--) { 333 idx = 4*(*vi++); 334 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 335 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 336 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 337 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 338 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 339 v += 16; 340 } 341 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 342 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 343 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 344 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 345 idiag += 16; 346 i2 += 4; 347 } 348 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 349 PetscLogFlops(16*(a->nz)); 350 } 351 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 352 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 353 i2 = 0; 354 mdiag = a->idiag+16*a->mbs; 355 for (i=0; i<m; i++) { 356 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 357 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 358 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 359 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 360 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 361 mdiag += 16; 362 i2 += 4; 363 } 364 PetscLogFlops(28*m); 365 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 366 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 367 } 368 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 369 idiag = a->idiag+16*a->mbs - 16; 370 i2 = 4*m - 4; 371 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 372 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 373 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 374 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 375 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 376 idiag -= 16; 377 i2 -= 4; 378 for (i=m-2; i>=0; i--) { 379 v = aa + 16*(diag[i]+1); 380 vi = aj + diag[i] + 1; 381 nz = ai[i+1] - diag[i] - 1; 382 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 383 while (nz--) { 384 idx = 4*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 386 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 387 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 388 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 389 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 390 v += 16; 391 } 392 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 393 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 394 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 395 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 396 idiag -= 16; 397 i2 -= 4; 398 } 399 PetscLogFlops(16*(a->nz)); 400 } 401 } else { 402 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 403 } 404 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 412 { 413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 414 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 415 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 416 PetscErrorCode ierr; 417 int m = a->mbs,i,i2,nz,idx; 418 const int *diag,*ai = a->i,*aj = a->j,*vi; 419 420 PetscFunctionBegin; 421 its = its*lits; 422 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 423 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 424 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 425 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 426 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 427 428 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 429 430 diag = a->diag; 431 idiag = a->idiag; 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 434 435 if (flag & SOR_ZERO_INITIAL_GUESS) { 436 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 437 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 438 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 439 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 440 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 441 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 442 i2 = 5; 443 idiag += 25; 444 for (i=1; i<m; i++) { 445 v = aa + 25*ai[i]; 446 vi = aj + ai[i]; 447 nz = diag[i] - ai[i]; 448 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 449 while (nz--) { 450 idx = 5*(*vi++); 451 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 452 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 453 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 454 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 455 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 456 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 457 v += 25; 458 } 459 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 460 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 461 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 462 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 463 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 464 idiag += 25; 465 i2 += 5; 466 } 467 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 468 PetscLogFlops(25*(a->nz)); 469 } 470 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 471 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 472 i2 = 0; 473 mdiag = a->idiag+25*a->mbs; 474 for (i=0; i<m; i++) { 475 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 476 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 477 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 478 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 479 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 480 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 481 mdiag += 25; 482 i2 += 5; 483 } 484 PetscLogFlops(45*m); 485 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 486 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 487 } 488 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 489 idiag = a->idiag+25*a->mbs - 25; 490 i2 = 5*m - 5; 491 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 492 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 493 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 494 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 495 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 496 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 497 idiag -= 25; 498 i2 -= 5; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 25*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 504 while (nz--) { 505 idx = 5*(*vi++); 506 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 507 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 508 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 509 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 510 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 511 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 512 v += 25; 513 } 514 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 515 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 516 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 517 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 518 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 519 idiag -= 25; 520 i2 -= 5; 521 } 522 PetscLogFlops(25*(a->nz)); 523 } 524 } else { 525 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 526 } 527 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 528 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } 531 532 /* 533 Special version for Fun3d sequential benchmark 534 */ 535 #if defined(PETSC_HAVE_FORTRAN_CAPS) 536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 538 #define matsetvaluesblocked4_ matsetvaluesblocked4 539 #endif 540 541 EXTERN_C_BEGIN 542 #undef __FUNCT__ 543 #define __FUNCT__ "matsetvaluesblocked4_" 544 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 545 { 546 Mat A = *AA; 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 549 int *ai=a->i,*ailen=a->ilen; 550 int *aj=a->j,stepval; 551 const PetscScalar *value = v; 552 MatScalar *ap,*aa = a->a,*bap; 553 554 PetscFunctionBegin; 555 stepval = (n-1)*4; 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 rp = aj + ai[row]; 559 ap = aa + 16*ai[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 col = in[l]; 564 value = v + k*(stepval+4)*4 + l*4; 565 low = 0; high = nrow; 566 while (high-low > 7) { 567 t = (low+high)/2; 568 if (rp[t] > col) high = t; 569 else low = t; 570 } 571 for (i=low; i<high; i++) { 572 if (rp[i] > col) break; 573 if (rp[i] == col) { 574 bap = ap + 16*i; 575 for (ii=0; ii<4; ii++,value+=stepval) { 576 for (jj=ii; jj<16; jj+=4) { 577 bap[jj] += *value++; 578 } 579 } 580 goto noinsert2; 581 } 582 } 583 N = nrow++ - 1; 584 /* shift up all the later entries in this row */ 585 for (ii=N; ii>=i; ii--) { 586 rp[ii+1] = rp[ii]; 587 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 588 } 589 if (N >= i) { 590 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 591 } 592 rp[i] = col; 593 bap = ap + 16*i; 594 for (ii=0; ii<4; ii++,value+=stepval) { 595 for (jj=ii; jj<16; jj+=4) { 596 bap[jj] = *value++; 597 } 598 } 599 noinsert2:; 600 low = i; 601 } 602 ailen[row] = nrow; 603 } 604 } 605 EXTERN_C_END 606 607 #if defined(PETSC_HAVE_FORTRAN_CAPS) 608 #define matsetvalues4_ MATSETVALUES4 609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 610 #define matsetvalues4_ matsetvalues4 611 #endif 612 613 EXTERN_C_BEGIN 614 #undef __FUNCT__ 615 #define __FUNCT__ "MatSetValues4_" 616 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 617 { 618 Mat A = *AA; 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 620 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 621 int *ai=a->i,*ailen=a->ilen; 622 int *aj=a->j,brow,bcol; 623 int ridx,cidx; 624 MatScalar *ap,value,*aa=a->a,*bap; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over added rows */ 628 row = im[k]; brow = row/4; 629 rp = aj + ai[brow]; 630 ap = aa + 16*ai[brow]; 631 nrow = ailen[brow]; 632 low = 0; 633 for (l=0; l<n; l++) { /* loop over added columns */ 634 col = in[l]; bcol = col/4; 635 ridx = row % 4; cidx = col % 4; 636 value = v[l + k*n]; 637 low = 0; high = nrow; 638 while (high-low > 7) { 639 t = (low+high)/2; 640 if (rp[t] > bcol) high = t; 641 else low = t; 642 } 643 for (i=low; i<high; i++) { 644 if (rp[i] > bcol) break; 645 if (rp[i] == bcol) { 646 bap = ap + 16*i + 4*cidx + ridx; 647 *bap += value; 648 goto noinsert1; 649 } 650 } 651 N = nrow++ - 1; 652 /* shift up all the later entries in this row */ 653 for (ii=N; ii>=i; ii--) { 654 rp[ii+1] = rp[ii]; 655 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 656 } 657 if (N>=i) { 658 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 659 } 660 rp[i] = bcol; 661 ap[16*i + 4*cidx + ridx] = value; 662 noinsert1:; 663 low = i; 664 } 665 ailen[brow] = nrow; 666 } 667 } 668 EXTERN_C_END 669 670 /* UGLY, ugly, ugly 671 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 672 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 673 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 674 converts the entries into single precision and then calls ..._MatScalar() to put them 675 into the single precision data structures. 676 */ 677 #if defined(PETSC_USE_MAT_SINGLE) 678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 679 #else 680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 681 #endif 682 683 #define CHUNKSIZE 10 684 685 /* 686 Checks for missing diagonals 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 693 PetscErrorCode ierr; 694 int *diag,*jj = a->j,i; 695 696 PetscFunctionBegin; 697 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 698 diag = a->diag; 699 for (i=0; i<a->mbs; i++) { 700 if (jj[diag[i]] != i) { 701 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %d",i); 702 } 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 710 { 711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 712 PetscErrorCode ierr; 713 int i,j,*diag,m = a->mbs; 714 715 PetscFunctionBegin; 716 if (a->diag) PetscFunctionReturn(0); 717 718 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 719 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 720 for (i=0; i<m; i++) { 721 diag[i] = a->i[i+1]; 722 for (j=a->i[i]; j<a->i[i+1]; j++) { 723 if (a->j[j] == i) { 724 diag[i] = j; 725 break; 726 } 727 } 728 } 729 a->diag = diag; 730 PetscFunctionReturn(0); 731 } 732 733 734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 739 { 740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 741 PetscErrorCode ierr; 742 int n = a->mbs,i; 743 744 PetscFunctionBegin; 745 *nn = n; 746 if (!ia) PetscFunctionReturn(0); 747 if (symmetric) { 748 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 749 } else if (oshift == 1) { 750 /* temporarily add 1 to i and j indices */ 751 int nz = a->i[n]; 752 for (i=0; i<nz; i++) a->j[i]++; 753 for (i=0; i<n+1; i++) a->i[i]++; 754 *ia = a->i; *ja = a->j; 755 } else { 756 *ia = a->i; *ja = a->j; 757 } 758 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 765 { 766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 767 PetscErrorCode ierr; 768 int i,n = a->mbs; 769 770 PetscFunctionBegin; 771 if (!ia) PetscFunctionReturn(0); 772 if (symmetric) { 773 ierr = PetscFree(*ia);CHKERRQ(ierr); 774 ierr = PetscFree(*ja);CHKERRQ(ierr); 775 } else if (oshift == 1) { 776 int nz = a->i[n]-1; 777 for (i=0; i<nz; i++) a->j[i]--; 778 for (i=0; i<n+1; i++) a->i[i]--; 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 785 PetscErrorCode MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 786 { 787 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 788 789 PetscFunctionBegin; 790 *bs = baij->bs; 791 PetscFunctionReturn(0); 792 } 793 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatDestroy_SeqBAIJ" 797 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 798 { 799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 800 PetscErrorCode ierr; 801 802 PetscFunctionBegin; 803 #if defined(PETSC_USE_LOG) 804 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 805 #endif 806 ierr = PetscFree(a->a);CHKERRQ(ierr); 807 if (!a->singlemalloc) { 808 ierr = PetscFree(a->i);CHKERRQ(ierr); 809 ierr = PetscFree(a->j);CHKERRQ(ierr); 810 } 811 if (a->row) { 812 ierr = ISDestroy(a->row);CHKERRQ(ierr); 813 } 814 if (a->col) { 815 ierr = ISDestroy(a->col);CHKERRQ(ierr); 816 } 817 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 818 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 819 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 820 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 821 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 822 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 823 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 824 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 825 #if defined(PETSC_USE_MAT_SINGLE) 826 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 827 #endif 828 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 829 830 ierr = PetscFree(a);CHKERRQ(ierr); 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatSetOption_SeqBAIJ" 836 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 837 { 838 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 839 840 PetscFunctionBegin; 841 switch (op) { 842 case MAT_ROW_ORIENTED: 843 a->roworiented = PETSC_TRUE; 844 break; 845 case MAT_COLUMN_ORIENTED: 846 a->roworiented = PETSC_FALSE; 847 break; 848 case MAT_COLUMNS_SORTED: 849 a->sorted = PETSC_TRUE; 850 break; 851 case MAT_COLUMNS_UNSORTED: 852 a->sorted = PETSC_FALSE; 853 break; 854 case MAT_KEEP_ZEROED_ROWS: 855 a->keepzeroedrows = PETSC_TRUE; 856 break; 857 case MAT_NO_NEW_NONZERO_LOCATIONS: 858 a->nonew = 1; 859 break; 860 case MAT_NEW_NONZERO_LOCATION_ERR: 861 a->nonew = -1; 862 break; 863 case MAT_NEW_NONZERO_ALLOCATION_ERR: 864 a->nonew = -2; 865 break; 866 case MAT_YES_NEW_NONZERO_LOCATIONS: 867 a->nonew = 0; 868 break; 869 case MAT_ROWS_SORTED: 870 case MAT_ROWS_UNSORTED: 871 case MAT_YES_NEW_DIAGONALS: 872 case MAT_IGNORE_OFF_PROC_ENTRIES: 873 case MAT_USE_HASH_TABLE: 874 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 875 break; 876 case MAT_NO_NEW_DIAGONALS: 877 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 878 case MAT_SYMMETRIC: 879 case MAT_STRUCTURALLY_SYMMETRIC: 880 case MAT_NOT_SYMMETRIC: 881 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 882 case MAT_HERMITIAN: 883 case MAT_NOT_HERMITIAN: 884 case MAT_SYMMETRY_ETERNAL: 885 case MAT_NOT_SYMMETRY_ETERNAL: 886 break; 887 default: 888 SETERRQ(PETSC_ERR_SUP,"unknown option"); 889 } 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatGetRow_SeqBAIJ" 895 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 896 { 897 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 898 PetscErrorCode ierr; 899 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 900 MatScalar *aa,*aa_i; 901 PetscScalar *v_i; 902 903 PetscFunctionBegin; 904 bs = a->bs; 905 ai = a->i; 906 aj = a->j; 907 aa = a->a; 908 bs2 = a->bs2; 909 910 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 911 912 bn = row/bs; /* Block number */ 913 bp = row % bs; /* Block Position */ 914 M = ai[bn+1] - ai[bn]; 915 *nz = bs*M; 916 917 if (v) { 918 *v = 0; 919 if (*nz) { 920 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 921 for (i=0; i<M; i++) { /* for each block in the block row */ 922 v_i = *v + i*bs; 923 aa_i = aa + bs2*(ai[bn] + i); 924 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 925 } 926 } 927 } 928 929 if (idx) { 930 *idx = 0; 931 if (*nz) { 932 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 933 for (i=0; i<M; i++) { /* for each block in the block row */ 934 idx_i = *idx + i*bs; 935 itmp = bs*aj[ai[bn] + i]; 936 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 937 } 938 } 939 } 940 PetscFunctionReturn(0); 941 } 942 943 #undef __FUNCT__ 944 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 945 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 946 { 947 PetscErrorCode ierr; 948 949 PetscFunctionBegin; 950 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 951 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatTranspose_SeqBAIJ" 957 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 958 { 959 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 960 Mat C; 961 PetscErrorCode ierr; 962 int i,j,k,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 963 int *rows,*cols,bs2=a->bs2; 964 PetscScalar *array; 965 966 PetscFunctionBegin; 967 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 968 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 969 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 970 971 #if defined(PETSC_USE_MAT_SINGLE) 972 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 973 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 974 #else 975 array = a->a; 976 #endif 977 978 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 979 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 980 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 981 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 982 ierr = PetscFree(col);CHKERRQ(ierr); 983 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 984 cols = rows + bs; 985 for (i=0; i<mbs; i++) { 986 cols[0] = i*bs; 987 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 988 len = ai[i+1] - ai[i]; 989 for (j=0; j<len; j++) { 990 rows[0] = (*aj++)*bs; 991 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 992 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 993 array += bs2; 994 } 995 } 996 ierr = PetscFree(rows);CHKERRQ(ierr); 997 #if defined(PETSC_USE_MAT_SINGLE) 998 ierr = PetscFree(array); 999 #endif 1000 1001 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1002 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 1004 if (B) { 1005 *B = C; 1006 } else { 1007 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1014 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1015 { 1016 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1017 PetscErrorCode ierr; 1018 int i,fd,*col_lens,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 1019 PetscScalar *aa; 1020 FILE *file; 1021 1022 PetscFunctionBegin; 1023 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1024 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 1025 col_lens[0] = MAT_FILE_COOKIE; 1026 1027 col_lens[1] = A->m; 1028 col_lens[2] = A->n; 1029 col_lens[3] = a->nz*bs2; 1030 1031 /* store lengths of each row and write (including header) to file */ 1032 count = 0; 1033 for (i=0; i<a->mbs; i++) { 1034 for (j=0; j<bs; j++) { 1035 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1036 } 1037 } 1038 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 1039 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1040 1041 /* store column indices (zero start index) */ 1042 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 1043 count = 0; 1044 for (i=0; i<a->mbs; i++) { 1045 for (j=0; j<bs; j++) { 1046 for (k=a->i[i]; k<a->i[i+1]; k++) { 1047 for (l=0; l<bs; l++) { 1048 jj[count++] = bs*a->j[k] + l; 1049 } 1050 } 1051 } 1052 } 1053 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 1054 ierr = PetscFree(jj);CHKERRQ(ierr); 1055 1056 /* store nonzero values */ 1057 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1058 count = 0; 1059 for (i=0; i<a->mbs; i++) { 1060 for (j=0; j<bs; j++) { 1061 for (k=a->i[i]; k<a->i[i+1]; k++) { 1062 for (l=0; l<bs; l++) { 1063 aa[count++] = a->a[bs2*k + l*bs + j]; 1064 } 1065 } 1066 } 1067 } 1068 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 1069 ierr = PetscFree(aa);CHKERRQ(ierr); 1070 1071 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1072 if (file) { 1073 fprintf(file,"-matload_block_size %d\n",a->bs); 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1080 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1081 { 1082 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 int i,j,bs = a->bs,k,l,bs2=a->bs2; 1085 PetscViewerFormat format; 1086 1087 PetscFunctionBegin; 1088 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1089 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1090 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1091 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1092 Mat aij; 1093 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1094 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1095 ierr = MatDestroy(aij);CHKERRQ(ierr); 1096 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1097 PetscFunctionReturn(0); 1098 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1099 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1100 for (i=0; i<a->mbs; i++) { 1101 for (j=0; j<bs; j++) { 1102 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1103 for (k=a->i[i]; k<a->i[i+1]; k++) { 1104 for (l=0; l<bs; l++) { 1105 #if defined(PETSC_USE_COMPLEX) 1106 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1107 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 1108 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1109 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1110 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 1111 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1112 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1113 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1114 } 1115 #else 1116 if (a->a[bs2*k + l*bs + j] != 0.0) { 1117 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1118 } 1119 #endif 1120 } 1121 } 1122 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1123 } 1124 } 1125 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1126 } else { 1127 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1128 for (i=0; i<a->mbs; i++) { 1129 for (j=0; j<bs; j++) { 1130 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 1131 for (k=a->i[i]; k<a->i[i+1]; k++) { 1132 for (l=0; l<bs; l++) { 1133 #if defined(PETSC_USE_COMPLEX) 1134 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1135 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 1136 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1137 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1138 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 1139 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1140 } else { 1141 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1142 } 1143 #else 1144 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1145 #endif 1146 } 1147 } 1148 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1149 } 1150 } 1151 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1152 } 1153 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1159 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1160 { 1161 Mat A = (Mat) Aa; 1162 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1163 PetscErrorCode ierr; 1164 int row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 1165 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1166 MatScalar *aa; 1167 PetscViewer viewer; 1168 1169 PetscFunctionBegin; 1170 1171 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1172 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1173 1174 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1175 1176 /* loop over matrix elements drawing boxes */ 1177 color = PETSC_DRAW_BLUE; 1178 for (i=0,row=0; i<mbs; i++,row+=bs) { 1179 for (j=a->i[i]; j<a->i[i+1]; j++) { 1180 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1181 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1182 aa = a->a + j*bs2; 1183 for (k=0; k<bs; k++) { 1184 for (l=0; l<bs; l++) { 1185 if (PetscRealPart(*aa++) >= 0.) continue; 1186 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1187 } 1188 } 1189 } 1190 } 1191 color = PETSC_DRAW_CYAN; 1192 for (i=0,row=0; i<mbs; i++,row+=bs) { 1193 for (j=a->i[i]; j<a->i[i+1]; j++) { 1194 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1195 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1196 aa = a->a + j*bs2; 1197 for (k=0; k<bs; k++) { 1198 for (l=0; l<bs; l++) { 1199 if (PetscRealPart(*aa++) != 0.) continue; 1200 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1201 } 1202 } 1203 } 1204 } 1205 1206 color = PETSC_DRAW_RED; 1207 for (i=0,row=0; i<mbs; i++,row+=bs) { 1208 for (j=a->i[i]; j<a->i[i+1]; j++) { 1209 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1210 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1211 aa = a->a + j*bs2; 1212 for (k=0; k<bs; k++) { 1213 for (l=0; l<bs; l++) { 1214 if (PetscRealPart(*aa++) <= 0.) continue; 1215 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1216 } 1217 } 1218 } 1219 } 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1225 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1226 { 1227 PetscErrorCode ierr; 1228 PetscReal xl,yl,xr,yr,w,h; 1229 PetscDraw draw; 1230 PetscTruth isnull; 1231 1232 PetscFunctionBegin; 1233 1234 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1235 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1236 1237 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1238 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1239 xr += w; yr += h; xl = -w; yl = -h; 1240 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1241 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1242 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatView_SeqBAIJ" 1248 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1249 { 1250 PetscErrorCode ierr; 1251 PetscTruth iascii,isbinary,isdraw; 1252 1253 PetscFunctionBegin; 1254 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1255 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1256 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1257 if (iascii){ 1258 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1259 } else if (isbinary) { 1260 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1261 } else if (isdraw) { 1262 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1263 } else { 1264 Mat B; 1265 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1266 ierr = MatView(B,viewer);CHKERRQ(ierr); 1267 ierr = MatDestroy(B);CHKERRQ(ierr); 1268 } 1269 PetscFunctionReturn(0); 1270 } 1271 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1275 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 1276 { 1277 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1278 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1279 int *ai = a->i,*ailen = a->ilen; 1280 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1281 MatScalar *ap,*aa = a->a,zero = 0.0; 1282 1283 PetscFunctionBegin; 1284 for (k=0; k<m; k++) { /* loop over rows */ 1285 row = im[k]; brow = row/bs; 1286 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1287 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 1288 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1289 nrow = ailen[brow]; 1290 for (l=0; l<n; l++) { /* loop over columns */ 1291 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1292 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 1293 col = in[l] ; 1294 bcol = col/bs; 1295 cidx = col%bs; 1296 ridx = row%bs; 1297 high = nrow; 1298 low = 0; /* assume unsorted */ 1299 while (high-low > 5) { 1300 t = (low+high)/2; 1301 if (rp[t] > bcol) high = t; 1302 else low = t; 1303 } 1304 for (i=low; i<high; i++) { 1305 if (rp[i] > bcol) break; 1306 if (rp[i] == bcol) { 1307 *v++ = ap[bs2*i+bs*cidx+ridx]; 1308 goto finished; 1309 } 1310 } 1311 *v++ = zero; 1312 finished:; 1313 } 1314 } 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #if defined(PETSC_USE_MAT_SINGLE) 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1321 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 1322 { 1323 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1324 PetscErrorCode ierr; 1325 int i,N = m*n*b->bs2; 1326 MatScalar *vsingle; 1327 1328 PetscFunctionBegin; 1329 if (N > b->setvalueslen) { 1330 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1331 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1332 b->setvalueslen = N; 1333 } 1334 vsingle = b->setvaluescopy; 1335 for (i=0; i<N; i++) { 1336 vsingle[i] = v[i]; 1337 } 1338 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 #endif 1342 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1346 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 1347 { 1348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1349 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1350 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1351 PetscErrorCode ierr; 1352 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 1353 PetscTruth roworiented=a->roworiented; 1354 const MatScalar *value = v; 1355 MatScalar *ap,*aa = a->a,*bap; 1356 1357 PetscFunctionBegin; 1358 if (roworiented) { 1359 stepval = (n-1)*bs; 1360 } else { 1361 stepval = (m-1)*bs; 1362 } 1363 for (k=0; k<m; k++) { /* loop over added rows */ 1364 row = im[k]; 1365 if (row < 0) continue; 1366 #if defined(PETSC_USE_BOPT_g) 1367 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 1368 #endif 1369 rp = aj + ai[row]; 1370 ap = aa + bs2*ai[row]; 1371 rmax = imax[row]; 1372 nrow = ailen[row]; 1373 low = 0; 1374 for (l=0; l<n; l++) { /* loop over added columns */ 1375 if (in[l] < 0) continue; 1376 #if defined(PETSC_USE_BOPT_g) 1377 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 1378 #endif 1379 col = in[l]; 1380 if (roworiented) { 1381 value = v + k*(stepval+bs)*bs + l*bs; 1382 } else { 1383 value = v + l*(stepval+bs)*bs + k*bs; 1384 } 1385 if (!sorted) low = 0; high = nrow; 1386 while (high-low > 7) { 1387 t = (low+high)/2; 1388 if (rp[t] > col) high = t; 1389 else low = t; 1390 } 1391 for (i=low; i<high; i++) { 1392 if (rp[i] > col) break; 1393 if (rp[i] == col) { 1394 bap = ap + bs2*i; 1395 if (roworiented) { 1396 if (is == ADD_VALUES) { 1397 for (ii=0; ii<bs; ii++,value+=stepval) { 1398 for (jj=ii; jj<bs2; jj+=bs) { 1399 bap[jj] += *value++; 1400 } 1401 } 1402 } else { 1403 for (ii=0; ii<bs; ii++,value+=stepval) { 1404 for (jj=ii; jj<bs2; jj+=bs) { 1405 bap[jj] = *value++; 1406 } 1407 } 1408 } 1409 } else { 1410 if (is == ADD_VALUES) { 1411 for (ii=0; ii<bs; ii++,value+=stepval) { 1412 for (jj=0; jj<bs; jj++) { 1413 *bap++ += *value++; 1414 } 1415 } 1416 } else { 1417 for (ii=0; ii<bs; ii++,value+=stepval) { 1418 for (jj=0; jj<bs; jj++) { 1419 *bap++ = *value++; 1420 } 1421 } 1422 } 1423 } 1424 goto noinsert2; 1425 } 1426 } 1427 if (nonew == 1) goto noinsert2; 1428 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1429 if (nrow >= rmax) { 1430 /* there is no extra room in row, therefore enlarge */ 1431 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1432 MatScalar *new_a; 1433 1434 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1435 1436 /* malloc new storage space */ 1437 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1438 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1439 new_j = (int*)(new_a + bs2*new_nz); 1440 new_i = new_j + new_nz; 1441 1442 /* copy over old data into new slots */ 1443 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1444 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1445 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 1446 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1447 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 1448 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1449 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1450 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1451 /* free up old matrix storage */ 1452 ierr = PetscFree(a->a);CHKERRQ(ierr); 1453 if (!a->singlemalloc) { 1454 ierr = PetscFree(a->i);CHKERRQ(ierr); 1455 ierr = PetscFree(a->j);CHKERRQ(ierr); 1456 } 1457 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1458 a->singlemalloc = PETSC_TRUE; 1459 1460 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1461 rmax = imax[row] = imax[row] + CHUNKSIZE; 1462 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1463 a->maxnz += bs2*CHUNKSIZE; 1464 a->reallocs++; 1465 a->nz++; 1466 } 1467 N = nrow++ - 1; 1468 /* shift up all the later entries in this row */ 1469 for (ii=N; ii>=i; ii--) { 1470 rp[ii+1] = rp[ii]; 1471 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1472 } 1473 if (N >= i) { 1474 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1475 } 1476 rp[i] = col; 1477 bap = ap + bs2*i; 1478 if (roworiented) { 1479 for (ii=0; ii<bs; ii++,value+=stepval) { 1480 for (jj=ii; jj<bs2; jj+=bs) { 1481 bap[jj] = *value++; 1482 } 1483 } 1484 } else { 1485 for (ii=0; ii<bs; ii++,value+=stepval) { 1486 for (jj=0; jj<bs; jj++) { 1487 *bap++ = *value++; 1488 } 1489 } 1490 } 1491 noinsert2:; 1492 low = i; 1493 } 1494 ailen[row] = nrow; 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1501 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1502 { 1503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1504 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1505 int m = A->m,*ip,N,*ailen = a->ilen; 1506 PetscErrorCode ierr; 1507 int mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1508 MatScalar *aa = a->a,*ap; 1509 1510 PetscFunctionBegin; 1511 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1512 1513 if (m) rmax = ailen[0]; 1514 for (i=1; i<mbs; i++) { 1515 /* move each row back by the amount of empty slots (fshift) before it*/ 1516 fshift += imax[i-1] - ailen[i-1]; 1517 rmax = PetscMax(rmax,ailen[i]); 1518 if (fshift) { 1519 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1520 N = ailen[i]; 1521 for (j=0; j<N; j++) { 1522 ip[j-fshift] = ip[j]; 1523 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1524 } 1525 } 1526 ai[i] = ai[i-1] + ailen[i-1]; 1527 } 1528 if (mbs) { 1529 fshift += imax[mbs-1] - ailen[mbs-1]; 1530 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1531 } 1532 /* reset ilen and imax for each row */ 1533 for (i=0; i<mbs; i++) { 1534 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1535 } 1536 a->nz = ai[mbs]; 1537 1538 /* diagonals may have moved, so kill the diagonal pointers */ 1539 a->idiagvalid = PETSC_FALSE; 1540 if (fshift && a->diag) { 1541 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1542 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1543 a->diag = 0; 1544 } 1545 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1546 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1547 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1548 a->reallocs = 0; 1549 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1550 1551 PetscFunctionReturn(0); 1552 } 1553 1554 1555 1556 /* 1557 This function returns an array of flags which indicate the locations of contiguous 1558 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1559 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1560 Assume: sizes should be long enough to hold all the values. 1561 */ 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1564 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1565 { 1566 int i,j,k,row; 1567 PetscTruth flg; 1568 1569 PetscFunctionBegin; 1570 for (i=0,j=0; i<n; j++) { 1571 row = idx[i]; 1572 if (row%bs!=0) { /* Not the begining of a block */ 1573 sizes[j] = 1; 1574 i++; 1575 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1576 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1577 i++; 1578 } else { /* Begining of the block, so check if the complete block exists */ 1579 flg = PETSC_TRUE; 1580 for (k=1; k<bs; k++) { 1581 if (row+k != idx[i+k]) { /* break in the block */ 1582 flg = PETSC_FALSE; 1583 break; 1584 } 1585 } 1586 if (flg == PETSC_TRUE) { /* No break in the bs */ 1587 sizes[j] = bs; 1588 i+= bs; 1589 } else { 1590 sizes[j] = 1; 1591 i++; 1592 } 1593 } 1594 } 1595 *bs_max = j; 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1601 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1602 { 1603 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1604 PetscErrorCode ierr; 1605 int i,j,k,count,is_n,*is_idx,*rows; 1606 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1607 PetscScalar zero = 0.0; 1608 MatScalar *aa; 1609 1610 PetscFunctionBegin; 1611 /* Make a copy of the IS and sort it */ 1612 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1613 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1614 1615 /* allocate memory for rows,sizes */ 1616 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1617 sizes = rows + is_n; 1618 1619 /* copy IS values to rows, and sort them */ 1620 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1621 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1622 if (baij->keepzeroedrows) { 1623 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1624 bs_max = is_n; 1625 } else { 1626 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1627 } 1628 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1629 1630 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1631 row = rows[j]; 1632 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1633 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1634 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1635 if (sizes[i] == bs && !baij->keepzeroedrows) { 1636 if (diag) { 1637 if (baij->ilen[row/bs] > 0) { 1638 baij->ilen[row/bs] = 1; 1639 baij->j[baij->i[row/bs]] = row/bs; 1640 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1641 } 1642 /* Now insert all the diagonal values for this bs */ 1643 for (k=0; k<bs; k++) { 1644 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1645 } 1646 } else { /* (!diag) */ 1647 baij->ilen[row/bs] = 0; 1648 } /* end (!diag) */ 1649 } else { /* (sizes[i] != bs) */ 1650 #if defined (PETSC_USE_DEBUG) 1651 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1652 #endif 1653 for (k=0; k<count; k++) { 1654 aa[0] = zero; 1655 aa += bs; 1656 } 1657 if (diag) { 1658 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1659 } 1660 } 1661 } 1662 1663 ierr = PetscFree(rows);CHKERRQ(ierr); 1664 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1670 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1671 { 1672 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1673 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1674 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1675 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1676 PetscErrorCode ierr; 1677 int ridx,cidx,bs2=a->bs2; 1678 PetscTruth roworiented=a->roworiented; 1679 MatScalar *ap,value,*aa=a->a,*bap; 1680 1681 PetscFunctionBegin; 1682 for (k=0; k<m; k++) { /* loop over added rows */ 1683 row = im[k]; brow = row/bs; 1684 if (row < 0) continue; 1685 #if defined(PETSC_USE_BOPT_g) 1686 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1687 #endif 1688 rp = aj + ai[brow]; 1689 ap = aa + bs2*ai[brow]; 1690 rmax = imax[brow]; 1691 nrow = ailen[brow]; 1692 low = 0; 1693 for (l=0; l<n; l++) { /* loop over added columns */ 1694 if (in[l] < 0) continue; 1695 #if defined(PETSC_USE_BOPT_g) 1696 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1697 #endif 1698 col = in[l]; bcol = col/bs; 1699 ridx = row % bs; cidx = col % bs; 1700 if (roworiented) { 1701 value = v[l + k*n]; 1702 } else { 1703 value = v[k + l*m]; 1704 } 1705 if (!sorted) low = 0; high = nrow; 1706 while (high-low > 7) { 1707 t = (low+high)/2; 1708 if (rp[t] > bcol) high = t; 1709 else low = t; 1710 } 1711 for (i=low; i<high; i++) { 1712 if (rp[i] > bcol) break; 1713 if (rp[i] == bcol) { 1714 bap = ap + bs2*i + bs*cidx + ridx; 1715 if (is == ADD_VALUES) *bap += value; 1716 else *bap = value; 1717 goto noinsert1; 1718 } 1719 } 1720 if (nonew == 1) goto noinsert1; 1721 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1722 if (nrow >= rmax) { 1723 /* there is no extra room in row, therefore enlarge */ 1724 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1725 MatScalar *new_a; 1726 1727 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1728 1729 /* Malloc new storage space */ 1730 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1731 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1732 new_j = (int*)(new_a + bs2*new_nz); 1733 new_i = new_j + new_nz; 1734 1735 /* copy over old data into new slots */ 1736 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1737 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1738 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1739 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1740 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1741 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1742 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1743 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1744 /* free up old matrix storage */ 1745 ierr = PetscFree(a->a);CHKERRQ(ierr); 1746 if (!a->singlemalloc) { 1747 ierr = PetscFree(a->i);CHKERRQ(ierr); 1748 ierr = PetscFree(a->j);CHKERRQ(ierr); 1749 } 1750 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1751 a->singlemalloc = PETSC_TRUE; 1752 1753 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1754 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1755 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1756 a->maxnz += bs2*CHUNKSIZE; 1757 a->reallocs++; 1758 a->nz++; 1759 } 1760 N = nrow++ - 1; 1761 /* shift up all the later entries in this row */ 1762 for (ii=N; ii>=i; ii--) { 1763 rp[ii+1] = rp[ii]; 1764 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1765 } 1766 if (N>=i) { 1767 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1768 } 1769 rp[i] = bcol; 1770 ap[bs2*i + bs*cidx + ridx] = value; 1771 noinsert1:; 1772 low = i; 1773 } 1774 ailen[brow] = nrow; 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1782 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1783 { 1784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1785 Mat outA; 1786 PetscErrorCode ierr; 1787 PetscTruth row_identity,col_identity; 1788 1789 PetscFunctionBegin; 1790 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1791 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1792 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1793 if (!row_identity || !col_identity) { 1794 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1795 } 1796 1797 outA = inA; 1798 inA->factor = FACTOR_LU; 1799 1800 if (!a->diag) { 1801 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1802 } 1803 1804 a->row = row; 1805 a->col = col; 1806 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1807 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1808 1809 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1810 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1811 PetscLogObjectParent(inA,a->icol); 1812 1813 /* 1814 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1815 for ILU(0) factorization with natural ordering 1816 */ 1817 if (a->bs < 8) { 1818 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1819 } else { 1820 if (!a->solve_work) { 1821 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1822 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1823 } 1824 } 1825 1826 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1827 1828 PetscFunctionReturn(0); 1829 } 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1832 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1833 { 1834 static PetscTruth called = PETSC_FALSE; 1835 MPI_Comm comm = A->comm; 1836 PetscErrorCode ierr; 1837 1838 PetscFunctionBegin; 1839 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1840 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1841 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 EXTERN_C_BEGIN 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1848 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1849 { 1850 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1851 int i,nz,nbs; 1852 1853 PetscFunctionBegin; 1854 nz = baij->maxnz/baij->bs2; 1855 nbs = baij->nbs; 1856 for (i=0; i<nz; i++) { 1857 baij->j[i] = indices[i]; 1858 } 1859 baij->nz = nz; 1860 for (i=0; i<nbs; i++) { 1861 baij->ilen[i] = baij->imax[i]; 1862 } 1863 1864 PetscFunctionReturn(0); 1865 } 1866 EXTERN_C_END 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1870 /*@ 1871 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1872 in the matrix. 1873 1874 Input Parameters: 1875 + mat - the SeqBAIJ matrix 1876 - indices - the column indices 1877 1878 Level: advanced 1879 1880 Notes: 1881 This can be called if you have precomputed the nonzero structure of the 1882 matrix and want to provide it to the matrix object to improve the performance 1883 of the MatSetValues() operation. 1884 1885 You MUST have set the correct numbers of nonzeros per row in the call to 1886 MatCreateSeqBAIJ(). 1887 1888 MUST be called before any calls to MatSetValues(); 1889 1890 @*/ 1891 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1892 { 1893 PetscErrorCode ierr,(*f)(Mat,int *); 1894 1895 PetscFunctionBegin; 1896 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1897 PetscValidPointer(indices,2); 1898 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1899 if (f) { 1900 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1901 } else { 1902 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1909 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1910 { 1911 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1912 PetscErrorCode ierr; 1913 int i,j,n,row,bs,*ai,*aj,mbs; 1914 PetscReal atmp; 1915 PetscScalar *x,zero = 0.0; 1916 MatScalar *aa; 1917 int ncols,brow,krow,kcol; 1918 1919 PetscFunctionBegin; 1920 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1921 bs = a->bs; 1922 aa = a->a; 1923 ai = a->i; 1924 aj = a->j; 1925 mbs = a->mbs; 1926 1927 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1928 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1929 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1930 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1931 for (i=0; i<mbs; i++) { 1932 ncols = ai[1] - ai[0]; ai++; 1933 brow = bs*i; 1934 for (j=0; j<ncols; j++){ 1935 /* bcol = bs*(*aj); */ 1936 for (kcol=0; kcol<bs; kcol++){ 1937 for (krow=0; krow<bs; krow++){ 1938 atmp = PetscAbsScalar(*aa); aa++; 1939 row = brow + krow; /* row index */ 1940 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1941 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1942 } 1943 } 1944 aj++; 1945 } 1946 } 1947 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1953 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1954 { 1955 PetscErrorCode ierr; 1956 1957 PetscFunctionBegin; 1958 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1964 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1965 { 1966 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1967 PetscFunctionBegin; 1968 *array = a->a; 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1974 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1975 { 1976 PetscFunctionBegin; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #include "petscblaslapack.h" 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1983 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1984 { 1985 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1986 PetscErrorCode ierr; 1987 int i,bs=y->bs,j,bs2; 1988 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1989 1990 PetscFunctionBegin; 1991 if (str == SAME_NONZERO_PATTERN) { 1992 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1993 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1994 if (y->xtoy && y->XtoY != X) { 1995 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1996 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1997 } 1998 if (!y->xtoy) { /* get xtoy */ 1999 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2000 y->XtoY = X; 2001 } 2002 bs2 = bs*bs; 2003 for (i=0; i<x->nz; i++) { 2004 j = 0; 2005 while (j < bs2){ 2006 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2007 j++; 2008 } 2009 } 2010 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 2011 } else { 2012 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2013 } 2014 PetscFunctionReturn(0); 2015 } 2016 2017 /* -------------------------------------------------------------------*/ 2018 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2019 MatGetRow_SeqBAIJ, 2020 MatRestoreRow_SeqBAIJ, 2021 MatMult_SeqBAIJ_N, 2022 /* 4*/ MatMultAdd_SeqBAIJ_N, 2023 MatMultTranspose_SeqBAIJ, 2024 MatMultTransposeAdd_SeqBAIJ, 2025 MatSolve_SeqBAIJ_N, 2026 0, 2027 0, 2028 /*10*/ 0, 2029 MatLUFactor_SeqBAIJ, 2030 0, 2031 0, 2032 MatTranspose_SeqBAIJ, 2033 /*15*/ MatGetInfo_SeqBAIJ, 2034 MatEqual_SeqBAIJ, 2035 MatGetDiagonal_SeqBAIJ, 2036 MatDiagonalScale_SeqBAIJ, 2037 MatNorm_SeqBAIJ, 2038 /*20*/ 0, 2039 MatAssemblyEnd_SeqBAIJ, 2040 0, 2041 MatSetOption_SeqBAIJ, 2042 MatZeroEntries_SeqBAIJ, 2043 /*25*/ MatZeroRows_SeqBAIJ, 2044 MatLUFactorSymbolic_SeqBAIJ, 2045 MatLUFactorNumeric_SeqBAIJ_N, 2046 0, 2047 0, 2048 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2049 MatILUFactorSymbolic_SeqBAIJ, 2050 0, 2051 MatGetArray_SeqBAIJ, 2052 MatRestoreArray_SeqBAIJ, 2053 /*35*/ MatDuplicate_SeqBAIJ, 2054 0, 2055 0, 2056 MatILUFactor_SeqBAIJ, 2057 0, 2058 /*40*/ MatAXPY_SeqBAIJ, 2059 MatGetSubMatrices_SeqBAIJ, 2060 MatIncreaseOverlap_SeqBAIJ, 2061 MatGetValues_SeqBAIJ, 2062 0, 2063 /*45*/ MatPrintHelp_SeqBAIJ, 2064 MatScale_SeqBAIJ, 2065 0, 2066 0, 2067 0, 2068 /*50*/ MatGetBlockSize_SeqBAIJ, 2069 MatGetRowIJ_SeqBAIJ, 2070 MatRestoreRowIJ_SeqBAIJ, 2071 0, 2072 0, 2073 /*55*/ 0, 2074 0, 2075 0, 2076 0, 2077 MatSetValuesBlocked_SeqBAIJ, 2078 /*60*/ MatGetSubMatrix_SeqBAIJ, 2079 MatDestroy_SeqBAIJ, 2080 MatView_SeqBAIJ, 2081 MatGetPetscMaps_Petsc, 2082 0, 2083 /*65*/ 0, 2084 0, 2085 0, 2086 0, 2087 0, 2088 /*70*/ MatGetRowMax_SeqBAIJ, 2089 MatConvert_Basic, 2090 0, 2091 0, 2092 0, 2093 /*75*/ 0, 2094 0, 2095 0, 2096 0, 2097 0, 2098 /*80*/ 0, 2099 0, 2100 0, 2101 0, 2102 /*84*/ MatLoad_SeqBAIJ, 2103 0, 2104 0, 2105 0, 2106 0 2107 }; 2108 2109 EXTERN_C_BEGIN 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2112 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2113 { 2114 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2115 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 if (aij->nonew != 1) { 2120 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2121 } 2122 2123 /* allocate space for values if not already there */ 2124 if (!aij->saved_values) { 2125 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2126 } 2127 2128 /* copy values over */ 2129 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 EXTERN_C_END 2133 2134 EXTERN_C_BEGIN 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2137 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2138 { 2139 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2140 PetscErrorCode ierr; 2141 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 2142 2143 PetscFunctionBegin; 2144 if (aij->nonew != 1) { 2145 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2146 } 2147 if (!aij->saved_values) { 2148 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2149 } 2150 2151 /* copy values over */ 2152 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 EXTERN_C_END 2156 2157 EXTERN_C_BEGIN 2158 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2159 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2160 EXTERN_C_END 2161 2162 EXTERN_C_BEGIN 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2165 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 2166 { 2167 Mat_SeqBAIJ *b; 2168 PetscErrorCode ierr; 2169 int i,len,mbs,nbs,bs2,newbs = bs; 2170 PetscTruth flg; 2171 2172 PetscFunctionBegin; 2173 2174 B->preallocated = PETSC_TRUE; 2175 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2176 if (nnz && newbs != bs) { 2177 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2178 } 2179 bs = newbs; 2180 2181 mbs = B->m/bs; 2182 nbs = B->n/bs; 2183 bs2 = bs*bs; 2184 2185 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2186 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 2187 } 2188 2189 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2190 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2191 if (nnz) { 2192 for (i=0; i<mbs; i++) { 2193 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 2194 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2195 } 2196 } 2197 2198 b = (Mat_SeqBAIJ*)B->data; 2199 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2200 B->ops->solve = MatSolve_SeqBAIJ_Update; 2201 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2202 if (!flg) { 2203 switch (bs) { 2204 case 1: 2205 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2206 B->ops->mult = MatMult_SeqBAIJ_1; 2207 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2208 break; 2209 case 2: 2210 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2211 B->ops->mult = MatMult_SeqBAIJ_2; 2212 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2213 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2214 break; 2215 case 3: 2216 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2217 B->ops->mult = MatMult_SeqBAIJ_3; 2218 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2219 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2220 break; 2221 case 4: 2222 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2223 B->ops->mult = MatMult_SeqBAIJ_4; 2224 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2225 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2226 break; 2227 case 5: 2228 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2229 B->ops->mult = MatMult_SeqBAIJ_5; 2230 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2231 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2232 break; 2233 case 6: 2234 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2235 B->ops->mult = MatMult_SeqBAIJ_6; 2236 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2237 break; 2238 case 7: 2239 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2240 B->ops->mult = MatMult_SeqBAIJ_7; 2241 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2242 break; 2243 default: 2244 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2245 B->ops->mult = MatMult_SeqBAIJ_N; 2246 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2247 break; 2248 } 2249 } 2250 b->bs = bs; 2251 b->mbs = mbs; 2252 b->nbs = nbs; 2253 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2254 if (!nnz) { 2255 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2256 else if (nz <= 0) nz = 1; 2257 for (i=0; i<mbs; i++) b->imax[i] = nz; 2258 nz = nz*mbs; 2259 } else { 2260 nz = 0; 2261 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2262 } 2263 2264 /* allocate the matrix space */ 2265 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2266 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2267 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2268 b->j = (int*)(b->a + nz*bs2); 2269 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2270 b->i = b->j + nz; 2271 b->singlemalloc = PETSC_TRUE; 2272 2273 b->i[0] = 0; 2274 for (i=1; i<mbs+1; i++) { 2275 b->i[i] = b->i[i-1] + b->imax[i-1]; 2276 } 2277 2278 /* b->ilen will count nonzeros in each block row so far. */ 2279 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2280 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2281 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2282 2283 b->bs = bs; 2284 b->bs2 = bs2; 2285 b->mbs = mbs; 2286 b->nz = 0; 2287 b->maxnz = nz*bs2; 2288 B->info.nz_unneeded = (PetscReal)b->maxnz; 2289 PetscFunctionReturn(0); 2290 } 2291 EXTERN_C_END 2292 2293 /*MC 2294 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2295 block sparse compressed row format. 2296 2297 Options Database Keys: 2298 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2299 2300 Level: beginner 2301 2302 .seealso: MatCreateSeqBAIJ 2303 M*/ 2304 2305 EXTERN_C_BEGIN 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatCreate_SeqBAIJ" 2308 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2309 { 2310 PetscErrorCode ierr; 2311 int size; 2312 Mat_SeqBAIJ *b; 2313 2314 PetscFunctionBegin; 2315 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2316 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2317 2318 B->m = B->M = PetscMax(B->m,B->M); 2319 B->n = B->N = PetscMax(B->n,B->N); 2320 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2321 B->data = (void*)b; 2322 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2323 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2324 B->factor = 0; 2325 B->lupivotthreshold = 1.0; 2326 B->mapping = 0; 2327 b->row = 0; 2328 b->col = 0; 2329 b->icol = 0; 2330 b->reallocs = 0; 2331 b->saved_values = 0; 2332 #if defined(PETSC_USE_MAT_SINGLE) 2333 b->setvalueslen = 0; 2334 b->setvaluescopy = PETSC_NULL; 2335 #endif 2336 2337 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2338 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2339 2340 b->sorted = PETSC_FALSE; 2341 b->roworiented = PETSC_TRUE; 2342 b->nonew = 0; 2343 b->diag = 0; 2344 b->solve_work = 0; 2345 b->mult_work = 0; 2346 B->spptr = 0; 2347 B->info.nz_unneeded = (PetscReal)b->maxnz; 2348 b->keepzeroedrows = PETSC_FALSE; 2349 b->xtoy = 0; 2350 b->XtoY = 0; 2351 2352 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2353 "MatStoreValues_SeqBAIJ", 2354 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2355 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2356 "MatRetrieveValues_SeqBAIJ", 2357 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2358 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2359 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2360 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2361 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2362 "MatConvert_SeqBAIJ_SeqAIJ", 2363 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2364 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2365 "MatConvert_SeqBAIJ_SeqSBAIJ", 2366 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2367 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2368 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2369 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2370 PetscFunctionReturn(0); 2371 } 2372 EXTERN_C_END 2373 2374 #undef __FUNCT__ 2375 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2376 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2377 { 2378 Mat C; 2379 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2380 PetscErrorCode ierr; 2381 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2382 2383 PetscFunctionBegin; 2384 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2385 2386 *B = 0; 2387 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2388 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2389 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2390 c = (Mat_SeqBAIJ*)C->data; 2391 2392 C->M = A->M; 2393 C->N = A->N; 2394 c->bs = a->bs; 2395 c->bs2 = a->bs2; 2396 c->mbs = a->mbs; 2397 c->nbs = a->nbs; 2398 2399 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 2400 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 2401 for (i=0; i<mbs; i++) { 2402 c->imax[i] = a->imax[i]; 2403 c->ilen[i] = a->ilen[i]; 2404 } 2405 2406 /* allocate the matrix space */ 2407 c->singlemalloc = PETSC_TRUE; 2408 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 2409 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2410 c->j = (int*)(c->a + nz*bs2); 2411 c->i = c->j + nz; 2412 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 2413 if (mbs > 0) { 2414 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 2415 if (cpvalues == MAT_COPY_VALUES) { 2416 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2417 } else { 2418 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2419 } 2420 } 2421 2422 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2423 c->sorted = a->sorted; 2424 c->roworiented = a->roworiented; 2425 c->nonew = a->nonew; 2426 2427 if (a->diag) { 2428 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 2429 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 2430 for (i=0; i<mbs; i++) { 2431 c->diag[i] = a->diag[i]; 2432 } 2433 } else c->diag = 0; 2434 c->nz = a->nz; 2435 c->maxnz = a->maxnz; 2436 c->solve_work = 0; 2437 c->mult_work = 0; 2438 C->preallocated = PETSC_TRUE; 2439 C->assembled = PETSC_TRUE; 2440 *B = C; 2441 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 #undef __FUNCT__ 2446 #define __FUNCT__ "MatLoad_SeqBAIJ" 2447 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2448 { 2449 Mat_SeqBAIJ *a; 2450 Mat B; 2451 PetscErrorCode ierr; 2452 int i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 2453 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2454 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2455 int *masked,nmask,tmp,bs2,ishift; 2456 PetscScalar *aa; 2457 MPI_Comm comm = ((PetscObject)viewer)->comm; 2458 2459 PetscFunctionBegin; 2460 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2461 bs2 = bs*bs; 2462 2463 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2464 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2465 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2466 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2467 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2468 M = header[1]; N = header[2]; nz = header[3]; 2469 2470 if (header[3] < 0) { 2471 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2472 } 2473 2474 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2475 2476 /* 2477 This code adds extra rows to make sure the number of rows is 2478 divisible by the blocksize 2479 */ 2480 mbs = M/bs; 2481 extra_rows = bs - M + bs*(mbs); 2482 if (extra_rows == bs) extra_rows = 0; 2483 else mbs++; 2484 if (extra_rows) { 2485 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2486 } 2487 2488 /* read in row lengths */ 2489 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 2490 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2491 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2492 2493 /* read in column indices */ 2494 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 2495 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2496 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2497 2498 /* loop over row lengths determining block row lengths */ 2499 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 2500 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 2501 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 2502 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 2503 masked = mask + mbs; 2504 rowcount = 0; nzcount = 0; 2505 for (i=0; i<mbs; i++) { 2506 nmask = 0; 2507 for (j=0; j<bs; j++) { 2508 kmax = rowlengths[rowcount]; 2509 for (k=0; k<kmax; k++) { 2510 tmp = jj[nzcount++]/bs; 2511 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2512 } 2513 rowcount++; 2514 } 2515 browlengths[i] += nmask; 2516 /* zero out the mask elements we set */ 2517 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2518 } 2519 2520 /* create our matrix */ 2521 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2522 ierr = MatSetType(B,type);CHKERRQ(ierr); 2523 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2524 a = (Mat_SeqBAIJ*)B->data; 2525 2526 /* set matrix "i" values */ 2527 a->i[0] = 0; 2528 for (i=1; i<= mbs; i++) { 2529 a->i[i] = a->i[i-1] + browlengths[i-1]; 2530 a->ilen[i-1] = browlengths[i-1]; 2531 } 2532 a->nz = 0; 2533 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2534 2535 /* read in nonzero values */ 2536 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2537 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2538 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2539 2540 /* set "a" and "j" values into matrix */ 2541 nzcount = 0; jcount = 0; 2542 for (i=0; i<mbs; i++) { 2543 nzcountb = nzcount; 2544 nmask = 0; 2545 for (j=0; j<bs; j++) { 2546 kmax = rowlengths[i*bs+j]; 2547 for (k=0; k<kmax; k++) { 2548 tmp = jj[nzcount++]/bs; 2549 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2550 } 2551 } 2552 /* sort the masked values */ 2553 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2554 2555 /* set "j" values into matrix */ 2556 maskcount = 1; 2557 for (j=0; j<nmask; j++) { 2558 a->j[jcount++] = masked[j]; 2559 mask[masked[j]] = maskcount++; 2560 } 2561 /* set "a" values into matrix */ 2562 ishift = bs2*a->i[i]; 2563 for (j=0; j<bs; j++) { 2564 kmax = rowlengths[i*bs+j]; 2565 for (k=0; k<kmax; k++) { 2566 tmp = jj[nzcountb]/bs ; 2567 block = mask[tmp] - 1; 2568 point = jj[nzcountb] - bs*tmp; 2569 idx = ishift + bs2*block + j + bs*point; 2570 a->a[idx] = (MatScalar)aa[nzcountb++]; 2571 } 2572 } 2573 /* zero out the mask elements we set */ 2574 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2575 } 2576 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2577 2578 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2579 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2580 ierr = PetscFree(aa);CHKERRQ(ierr); 2581 ierr = PetscFree(jj);CHKERRQ(ierr); 2582 ierr = PetscFree(mask);CHKERRQ(ierr); 2583 2584 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2585 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2586 ierr = MatView_Private(B);CHKERRQ(ierr); 2587 2588 *A = B; 2589 PetscFunctionReturn(0); 2590 } 2591 2592 #undef __FUNCT__ 2593 #define __FUNCT__ "MatCreateSeqBAIJ" 2594 /*@C 2595 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2596 compressed row) format. For good matrix assembly performance the 2597 user should preallocate the matrix storage by setting the parameter nz 2598 (or the array nnz). By setting these parameters accurately, performance 2599 during matrix assembly can be increased by more than a factor of 50. 2600 2601 Collective on MPI_Comm 2602 2603 Input Parameters: 2604 + comm - MPI communicator, set to PETSC_COMM_SELF 2605 . bs - size of block 2606 . m - number of rows 2607 . n - number of columns 2608 . nz - number of nonzero blocks per block row (same for all rows) 2609 - nnz - array containing the number of nonzero blocks in the various block rows 2610 (possibly different for each block row) or PETSC_NULL 2611 2612 Output Parameter: 2613 . A - the matrix 2614 2615 Options Database Keys: 2616 . -mat_no_unroll - uses code that does not unroll the loops in the 2617 block calculations (much slower) 2618 . -mat_block_size - size of the blocks to use 2619 2620 Level: intermediate 2621 2622 Notes: 2623 A nonzero block is any block that as 1 or more nonzeros in it 2624 2625 The block AIJ format is fully compatible with standard Fortran 77 2626 storage. That is, the stored row and column indices can begin at 2627 either one (as in Fortran) or zero. See the users' manual for details. 2628 2629 Specify the preallocated storage with either nz or nnz (not both). 2630 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2631 allocation. For additional details, see the users manual chapter on 2632 matrices. 2633 2634 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2635 @*/ 2636 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2637 { 2638 PetscErrorCode ierr; 2639 2640 PetscFunctionBegin; 2641 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2642 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2643 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2649 /*@C 2650 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2651 per row in the matrix. For good matrix assembly performance the 2652 user should preallocate the matrix storage by setting the parameter nz 2653 (or the array nnz). By setting these parameters accurately, performance 2654 during matrix assembly can be increased by more than a factor of 50. 2655 2656 Collective on MPI_Comm 2657 2658 Input Parameters: 2659 + A - the matrix 2660 . bs - size of block 2661 . nz - number of block nonzeros per block row (same for all rows) 2662 - nnz - array containing the number of block nonzeros in the various block rows 2663 (possibly different for each block row) or PETSC_NULL 2664 2665 Options Database Keys: 2666 . -mat_no_unroll - uses code that does not unroll the loops in the 2667 block calculations (much slower) 2668 . -mat_block_size - size of the blocks to use 2669 2670 Level: intermediate 2671 2672 Notes: 2673 The block AIJ format is fully compatible with standard Fortran 77 2674 storage. That is, the stored row and column indices can begin at 2675 either one (as in Fortran) or zero. See the users' manual for details. 2676 2677 Specify the preallocated storage with either nz or nnz (not both). 2678 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2679 allocation. For additional details, see the users manual chapter on 2680 matrices. 2681 2682 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2683 @*/ 2684 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2685 { 2686 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 2687 2688 PetscFunctionBegin; 2689 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2690 if (f) { 2691 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2692 } 2693 PetscFunctionReturn(0); 2694 } 2695 2696