1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs){ 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,s1,s2; 224 const PetscScalar *b; 225 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 232 its = its*lits; 233 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 250 i2 = 2; 251 idiag += 4; 252 for (i=1; i<m; i++) { 253 v = aa + 4*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; 257 for (j=0; j<nz; j++) { 258 idx = 2*vi[j]; 259 it = 4*j; 260 x1 = x[idx]; x2 = x[1+idx]; 261 s1 -= v[it]*x1 + v[it+2]*x2; 262 s2 -= v[it+1]*x1 + v[it+3]*x2; 263 } 264 x[i2] = idiag[0]*s1 + idiag[2]*s2; 265 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 266 idiag += 4; 267 i2 += 2; 268 } 269 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 270 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 271 } 272 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 273 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 274 i2 = 0; 275 mdiag = a->idiag+4*a->mbs; 276 for (i=0; i<m; i++) { 277 x1 = x[i2]; x2 = x[i2+1]; 278 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 279 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 280 mdiag += 4; 281 i2 += 2; 282 } 283 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 284 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 286 } 287 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 288 idiag = a->idiag+4*a->mbs - 4; 289 i2 = 2*m - 2; 290 x1 = x[i2]; x2 = x[i2+1]; 291 x[i2] = idiag[0]*x1 + idiag[2]*x2; 292 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 293 idiag -= 4; 294 i2 -= 2; 295 for (i=m-2; i>=0; i--) { 296 v = aa + 4*(diag[i]+1); 297 vi = aj + diag[i] + 1; 298 nz = ai[i+1] - diag[i] - 1; 299 s1 = x[i2]; s2 = x[i2+1]; 300 for (j=0; j<nz; j++) { 301 idx = 2*vi[j]; 302 it = 4*j; 303 x1 = x[idx]; x2 = x[1+idx]; 304 s1 -= v[it]*x1 + v[it+2]*x2; 305 s2 -= v[it+1]*x1 + v[it+3]*x2; 306 } 307 x[i2] = idiag[0]*s1 + idiag[2]*s2; 308 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 309 idiag -= 4; 310 i2 -= 2; 311 } 312 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 313 } 314 } else { 315 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 316 } 317 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 PetscScalar *x,x1,x2,x3,s1,s2,s3; 328 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 329 const PetscScalar *b; 330 PetscErrorCode ierr; 331 PetscInt m = a->mbs,i,i2,nz,idx; 332 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 333 334 PetscFunctionBegin; 335 its = its*lits; 336 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 337 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 338 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 339 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 340 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 341 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 342 343 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 344 345 diag = a->diag; 346 idiag = a->idiag; 347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 349 350 if (flag & SOR_ZERO_INITIAL_GUESS) { 351 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 352 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 353 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 354 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 355 i2 = 3; 356 idiag += 9; 357 for (i=1; i<m; i++) { 358 v = aa + 9*ai[i]; 359 vi = aj + ai[i]; 360 nz = diag[i] - ai[i]; 361 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 362 while (nz--) { 363 idx = 3*(*vi++); 364 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 365 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 366 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 367 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 368 v += 9; 369 } 370 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 371 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 372 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 373 idiag += 9; 374 i2 += 3; 375 } 376 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 377 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 378 } 379 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 380 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 381 i2 = 0; 382 mdiag = a->idiag+9*a->mbs; 383 for (i=0; i<m; i++) { 384 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 385 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 386 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 387 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 388 mdiag += 9; 389 i2 += 3; 390 } 391 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 392 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 393 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 394 } 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 396 idiag = a->idiag+9*a->mbs - 9; 397 i2 = 3*m - 3; 398 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 399 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 400 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 401 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 402 idiag -= 9; 403 i2 -= 3; 404 for (i=m-2; i>=0; i--) { 405 v = aa + 9*(diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 409 while (nz--) { 410 idx = 3*(*vi++); 411 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 412 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 413 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 414 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 415 v += 9; 416 } 417 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 418 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 419 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 420 idiag -= 9; 421 i2 -= 3; 422 } 423 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 424 } 425 } else { 426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 427 } 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 439 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 440 const PetscScalar *b; 441 PetscErrorCode ierr; 442 PetscInt m = a->mbs,i,i2,nz,idx; 443 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 444 445 PetscFunctionBegin; 446 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 447 its = its*lits; 448 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 449 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 450 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 451 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 452 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 453 454 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 455 456 diag = a->diag; 457 idiag = a->idiag; 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 460 461 if (flag & SOR_ZERO_INITIAL_GUESS) { 462 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 463 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 464 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 465 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 466 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 467 i2 = 4; 468 idiag += 16; 469 for (i=1; i<m; i++) { 470 v = aa + 16*ai[i]; 471 vi = aj + ai[i]; 472 nz = diag[i] - ai[i]; 473 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 474 while (nz--) { 475 idx = 4*(*vi++); 476 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 477 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 478 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 479 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 480 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 481 v += 16; 482 } 483 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 484 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 485 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 486 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 487 idiag += 16; 488 i2 += 4; 489 } 490 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 491 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 492 } 493 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 494 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 495 i2 = 0; 496 mdiag = a->idiag+16*a->mbs; 497 for (i=0; i<m; i++) { 498 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 499 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 500 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 501 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 502 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 503 mdiag += 16; 504 i2 += 4; 505 } 506 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 507 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 508 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 509 } 510 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 511 idiag = a->idiag+16*a->mbs - 16; 512 i2 = 4*m - 4; 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 515 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 516 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 517 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 518 idiag -= 16; 519 i2 -= 4; 520 for (i=m-2; i>=0; i--) { 521 v = aa + 16*(diag[i]+1); 522 vi = aj + diag[i] + 1; 523 nz = ai[i+1] - diag[i] - 1; 524 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 525 while (nz--) { 526 idx = 4*(*vi++); 527 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 528 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 529 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 530 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 531 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 532 v += 16; 533 } 534 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 535 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 536 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 537 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 538 idiag -= 16; 539 i2 -= 4; 540 } 541 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 542 } 543 } else { 544 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 545 } 546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 547 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 554 { 555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 556 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 557 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 558 const PetscScalar *b; 559 PetscErrorCode ierr; 560 PetscInt m = a->mbs,i,i2,nz,idx; 561 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 562 563 PetscFunctionBegin; 564 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 565 its = its*lits; 566 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 567 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 568 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 569 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 570 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 571 572 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 573 574 diag = a->diag; 575 idiag = a->idiag; 576 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 578 579 if (flag & SOR_ZERO_INITIAL_GUESS) { 580 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 581 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 582 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 583 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 584 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 585 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 586 i2 = 5; 587 idiag += 25; 588 for (i=1; i<m; i++) { 589 v = aa + 25*ai[i]; 590 vi = aj + ai[i]; 591 nz = diag[i] - ai[i]; 592 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 593 while (nz--) { 594 idx = 5*(*vi++); 595 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 596 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 597 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 598 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 599 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 600 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 601 v += 25; 602 } 603 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 604 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 605 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 606 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 607 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 608 idiag += 25; 609 i2 += 5; 610 } 611 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 612 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 613 } 614 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 615 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 616 i2 = 0; 617 mdiag = a->idiag+25*a->mbs; 618 for (i=0; i<m; i++) { 619 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 620 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 621 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 622 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 623 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 624 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 625 mdiag += 25; 626 i2 += 5; 627 } 628 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 629 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 630 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 631 } 632 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 633 idiag = a->idiag+25*a->mbs - 25; 634 i2 = 5*m - 5; 635 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 636 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 637 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 638 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 639 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 640 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 641 idiag -= 25; 642 i2 -= 5; 643 for (i=m-2; i>=0; i--) { 644 v = aa + 25*(diag[i]+1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i+1] - diag[i] - 1; 647 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 651 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 652 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 653 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 654 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 655 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 656 v += 25; 657 } 658 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 659 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 660 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 661 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 662 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 663 idiag -= 25; 664 i2 -= 5; 665 } 666 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 670 } 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 679 { 680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 681 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 682 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 683 const PetscScalar *b; 684 PetscErrorCode ierr; 685 PetscInt m = a->mbs,i,i2,nz,idx; 686 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 687 688 PetscFunctionBegin; 689 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 690 its = its*lits; 691 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 692 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 693 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 694 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 695 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 696 697 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 698 699 diag = a->diag; 700 idiag = a->idiag; 701 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 703 704 if (flag & SOR_ZERO_INITIAL_GUESS) { 705 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 706 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 707 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 708 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 709 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 710 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 711 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 712 i2 = 6; 713 idiag += 36; 714 for (i=1; i<m; i++) { 715 v = aa + 36*ai[i]; 716 vi = aj + ai[i]; 717 nz = diag[i] - ai[i]; 718 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 719 while (nz--) { 720 idx = 6*(*vi++); 721 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 722 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 723 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 724 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 725 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 726 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 727 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 728 v += 36; 729 } 730 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 731 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 732 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 733 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 734 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 735 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 736 idiag += 36; 737 i2 += 6; 738 } 739 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 740 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 741 } 742 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 743 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 744 i2 = 0; 745 mdiag = a->idiag+36*a->mbs; 746 for (i=0; i<m; i++) { 747 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 748 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 749 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 750 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 751 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 752 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 753 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 754 mdiag += 36; 755 i2 += 6; 756 } 757 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 758 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 759 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 760 } 761 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 762 idiag = a->idiag+36*a->mbs - 36; 763 i2 = 6*m - 6; 764 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 765 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 766 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 767 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 768 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 769 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 770 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 771 idiag -= 36; 772 i2 -= 6; 773 for (i=m-2; i>=0; i--) { 774 v = aa + 36*(diag[i]+1); 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 778 while (nz--) { 779 idx = 6*(*vi++); 780 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 781 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 782 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 783 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 784 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 785 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 786 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 787 v += 36; 788 } 789 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 790 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 791 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 792 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 793 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 794 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 795 idiag -= 36; 796 i2 -= 6; 797 } 798 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 799 } 800 } else { 801 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 802 } 803 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 813 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 814 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 815 const PetscScalar *b; 816 PetscErrorCode ierr; 817 PetscInt m = a->mbs,i,i2,nz,idx; 818 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 819 820 PetscFunctionBegin; 821 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 822 its = its*lits; 823 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 824 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 825 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 826 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 827 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 828 829 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 830 831 diag = a->diag; 832 idiag = a->idiag; 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 835 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 838 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 839 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 840 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 841 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 842 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 843 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 844 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 845 i2 = 7; 846 idiag += 49; 847 for (i=1; i<m; i++) { 848 v = aa + 49*ai[i]; 849 vi = aj + ai[i]; 850 nz = diag[i] - ai[i]; 851 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 852 while (nz--) { 853 idx = 7*(*vi++); 854 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 855 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 856 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 857 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 858 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 859 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 860 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 861 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 862 v += 49; 863 } 864 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 865 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 866 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 867 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 868 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 869 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 870 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 871 idiag += 49; 872 i2 += 7; 873 } 874 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 875 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 876 } 877 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 878 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 879 i2 = 0; 880 mdiag = a->idiag+49*a->mbs; 881 for (i=0; i<m; i++) { 882 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 883 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 884 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 885 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 886 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 887 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 888 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 889 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 890 mdiag += 49; 891 i2 += 7; 892 } 893 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 894 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 895 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 896 } 897 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 898 idiag = a->idiag+49*a->mbs - 49; 899 i2 = 7*m - 7; 900 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 901 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 902 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 903 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 904 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 905 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 906 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 907 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 908 idiag -= 49; 909 i2 -= 7; 910 for (i=m-2; i>=0; i--) { 911 v = aa + 49*(diag[i]+1); 912 vi = aj + diag[i] + 1; 913 nz = ai[i+1] - diag[i] - 1; 914 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 918 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 919 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 920 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 921 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 922 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 923 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 924 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 925 v += 49; 926 } 927 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 928 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 929 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 930 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 931 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 932 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 933 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 934 idiag -= 49; 935 i2 -= 7; 936 } 937 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 938 } 939 } else { 940 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 941 } 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 950 { 951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 952 PetscScalar *x,*work,*w,*workt; 953 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 954 const PetscScalar *b; 955 PetscErrorCode ierr; 956 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 957 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 958 959 PetscFunctionBegin; 960 its = its*lits; 961 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 962 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 963 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 964 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 965 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 966 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 967 968 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 969 970 diag = a->diag; 971 idiag = a->idiag; 972 if (!a->mult_work) { 973 k = PetscMax(A->rmap->n,A->cmap->n); 974 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 975 } 976 work = a->mult_work; 977 if (!a->sor_work) { 978 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 979 } 980 w = a->sor_work; 981 982 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 984 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 987 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 988 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 989 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 990 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 991 i2 = bs; 992 idiag += bs2; 993 for (i=1; i<m; i++) { 994 v = aa + bs2*ai[i]; 995 vi = aj + ai[i]; 996 nz = diag[i] - ai[i]; 997 998 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 999 /* copy all rows of x that are needed into contiguous space */ 1000 workt = work; 1001 for (j=0; j<nz; j++) { 1002 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1003 workt += bs; 1004 } 1005 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1006 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1007 while (nz--) { 1008 idx = N*(*vi++); 1009 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1010 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1011 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1012 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1013 v += N2; 1014 } */ 1015 1016 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1017 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1018 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1019 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1020 1021 idiag += bs2; 1022 i2 += bs; 1023 } 1024 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1025 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1026 } 1027 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1028 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1029 i2 = 0; 1030 mdiag = a->idiag+bs2*a->mbs; 1031 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 for (i=0; i<m; i++) { 1033 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1034 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1035 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1036 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1037 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1038 1039 mdiag += bs2; 1040 i2 += bs; 1041 } 1042 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1043 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1044 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1047 idiag = a->idiag+bs2*a->mbs - bs2; 1048 i2 = bs*m - bs; 1049 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1050 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1051 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1052 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1053 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1054 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1055 idiag -= bs2; 1056 i2 -= bs; 1057 for (i=m-2; i>=0; i--) { 1058 v = aa + bs2*(diag[i]+1); 1059 vi = aj + diag[i] + 1; 1060 nz = ai[i+1] - diag[i] - 1; 1061 1062 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 /* copy all rows of x that are needed into contiguous space */ 1064 workt = work; 1065 for (j=0; j<nz; j++) { 1066 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1067 workt += bs; 1068 } 1069 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1070 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1071 while (nz--) { 1072 idx = N*(*vi++); 1073 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1074 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1075 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1076 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1077 v += N2; 1078 } */ 1079 1080 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1081 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1082 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1083 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1084 idiag -= bs2; 1085 i2 -= bs; 1086 } 1087 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1088 } 1089 } else { 1090 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1091 } 1092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 /* 1098 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1099 */ 1100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1103 #define matsetvaluesblocked4_ matsetvaluesblocked4 1104 #endif 1105 1106 EXTERN_C_BEGIN 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "matsetvaluesblocked4_" 1109 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1110 { 1111 Mat A = *AA; 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1114 PetscInt *ai=a->i,*ailen=a->ilen; 1115 PetscInt *aj=a->j,stepval,lastcol = -1; 1116 const PetscScalar *value = v; 1117 MatScalar *ap,*aa = a->a,*bap; 1118 1119 PetscFunctionBegin; 1120 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1121 stepval = (n-1)*4; 1122 for (k=0; k<m; k++) { /* loop over added rows */ 1123 row = im[k]; 1124 rp = aj + ai[row]; 1125 ap = aa + 16*ai[row]; 1126 nrow = ailen[row]; 1127 low = 0; 1128 high = nrow; 1129 for (l=0; l<n; l++) { /* loop over added columns */ 1130 col = in[l]; 1131 if (col <= lastcol) low = 0; else high = nrow; 1132 lastcol = col; 1133 value = v + k*(stepval+4 + l)*4; 1134 while (high-low > 7) { 1135 t = (low+high)/2; 1136 if (rp[t] > col) high = t; 1137 else low = t; 1138 } 1139 for (i=low; i<high; i++) { 1140 if (rp[i] > col) break; 1141 if (rp[i] == col) { 1142 bap = ap + 16*i; 1143 for (ii=0; ii<4; ii++,value+=stepval) { 1144 for (jj=ii; jj<16; jj+=4) { 1145 bap[jj] += *value++; 1146 } 1147 } 1148 goto noinsert2; 1149 } 1150 } 1151 N = nrow++ - 1; 1152 high++; /* added new column index thus must search to one higher than before */ 1153 /* shift up all the later entries in this row */ 1154 for (ii=N; ii>=i; ii--) { 1155 rp[ii+1] = rp[ii]; 1156 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1157 } 1158 if (N >= i) { 1159 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1160 } 1161 rp[i] = col; 1162 bap = ap + 16*i; 1163 for (ii=0; ii<4; ii++,value+=stepval) { 1164 for (jj=ii; jj<16; jj+=4) { 1165 bap[jj] = *value++; 1166 } 1167 } 1168 noinsert2:; 1169 low = i; 1170 } 1171 ailen[row] = nrow; 1172 } 1173 PetscFunctionReturnVoid(); 1174 } 1175 EXTERN_C_END 1176 1177 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1178 #define matsetvalues4_ MATSETVALUES4 1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1180 #define matsetvalues4_ matsetvalues4 1181 #endif 1182 1183 EXTERN_C_BEGIN 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatSetValues4_" 1186 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1187 { 1188 Mat A = *AA; 1189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1190 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1191 PetscInt *ai=a->i,*ailen=a->ilen; 1192 PetscInt *aj=a->j,brow,bcol; 1193 PetscInt ridx,cidx,lastcol = -1; 1194 MatScalar *ap,value,*aa=a->a,*bap; 1195 1196 PetscFunctionBegin; 1197 for (k=0; k<m; k++) { /* loop over added rows */ 1198 row = im[k]; brow = row/4; 1199 rp = aj + ai[brow]; 1200 ap = aa + 16*ai[brow]; 1201 nrow = ailen[brow]; 1202 low = 0; 1203 high = nrow; 1204 for (l=0; l<n; l++) { /* loop over added columns */ 1205 col = in[l]; bcol = col/4; 1206 ridx = row % 4; cidx = col % 4; 1207 value = v[l + k*n]; 1208 if (col <= lastcol) low = 0; else high = nrow; 1209 lastcol = col; 1210 while (high-low > 7) { 1211 t = (low+high)/2; 1212 if (rp[t] > bcol) high = t; 1213 else low = t; 1214 } 1215 for (i=low; i<high; i++) { 1216 if (rp[i] > bcol) break; 1217 if (rp[i] == bcol) { 1218 bap = ap + 16*i + 4*cidx + ridx; 1219 *bap += value; 1220 goto noinsert1; 1221 } 1222 } 1223 N = nrow++ - 1; 1224 high++; /* added new column thus must search to one higher than before */ 1225 /* shift up all the later entries in this row */ 1226 for (ii=N; ii>=i; ii--) { 1227 rp[ii+1] = rp[ii]; 1228 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1229 } 1230 if (N>=i) { 1231 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1232 } 1233 rp[i] = bcol; 1234 ap[16*i + 4*cidx + ridx] = value; 1235 noinsert1:; 1236 low = i; 1237 } 1238 ailen[brow] = nrow; 1239 } 1240 PetscFunctionReturnVoid(); 1241 } 1242 EXTERN_C_END 1243 1244 /* 1245 Checks for missing diagonals 1246 */ 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 PetscInt *diag,*jj = a->j,i; 1254 1255 PetscFunctionBegin; 1256 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1257 *missing = PETSC_FALSE; 1258 if (A->rmap->n > 0 && !jj) { 1259 *missing = PETSC_TRUE; 1260 if (d) *d = 0; 1261 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1262 } else { 1263 diag = a->diag; 1264 for (i=0; i<a->mbs; i++) { 1265 if (jj[diag[i]] != i) { 1266 *missing = PETSC_TRUE; 1267 if (d) *d = i; 1268 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1269 break; 1270 } 1271 } 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt i,j,m = a->mbs; 1283 1284 PetscFunctionBegin; 1285 if (!a->diag) { 1286 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1287 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1288 a->free_diag = PETSC_TRUE; 1289 } 1290 for (i=0; i<m; i++) { 1291 a->diag[i] = a->i[i+1]; 1292 for (j=a->i[i]; j<a->i[i+1]; j++) { 1293 if (a->j[j] == i) { 1294 a->diag[i] = j; 1295 break; 1296 } 1297 } 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1305 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1306 { 1307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1310 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1311 1312 PetscFunctionBegin; 1313 *nn = n; 1314 if (!ia) PetscFunctionReturn(0); 1315 if (symmetric) { 1316 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1317 nz = tia[n]; 1318 } else { 1319 tia = a->i; tja = a->j; 1320 } 1321 1322 if (!blockcompressed && bs > 1) { 1323 (*nn) *= bs; 1324 /* malloc & create the natural set of indices */ 1325 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1326 if (n) { 1327 (*ia)[0] = 0; 1328 for (j=1; j<bs; j++) { 1329 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1330 } 1331 } 1332 1333 for (i=1; i<n; i++) { 1334 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1335 for (j=1; j<bs; j++) { 1336 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1337 } 1338 } 1339 if (n) { 1340 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1341 } 1342 1343 if (inja) { 1344 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1345 cnt = 0; 1346 for (i=0; i<n; i++) { 1347 for (j=0; j<bs; j++) { 1348 for (k=tia[i]; k<tia[i+1]; k++) { 1349 for (l=0; l<bs; l++) { 1350 (*ja)[cnt++] = bs*tja[k] + l; 1351 } 1352 } 1353 } 1354 } 1355 } 1356 1357 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1358 ierr = PetscFree(tia);CHKERRQ(ierr); 1359 ierr = PetscFree(tja);CHKERRQ(ierr); 1360 } 1361 } else if (oshift == 1) { 1362 if (symmetric) { 1363 nz = tia[A->rmap->n/bs]; 1364 /* add 1 to i and j indices */ 1365 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1366 *ia = tia; 1367 if (ja) { 1368 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1369 *ja = tja; 1370 } 1371 } else { 1372 nz = a->i[A->rmap->n/bs]; 1373 /* malloc space and add 1 to i and j indices */ 1374 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1375 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1376 if (ja) { 1377 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1378 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1379 } 1380 } 1381 } else { 1382 *ia = tia; 1383 if (ja) *ja = tja; 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1390 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1391 { 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 if (!ia) PetscFunctionReturn(0); 1396 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1397 ierr = PetscFree(*ia);CHKERRQ(ierr); 1398 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1399 } 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1405 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1406 { 1407 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 #if defined(PETSC_USE_LOG) 1412 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1413 #endif 1414 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1415 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1416 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1417 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1418 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1419 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1420 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1421 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1422 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1423 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1424 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1425 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1426 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1427 1428 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1429 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1430 ierr = PetscFree(A->data);CHKERRQ(ierr); 1431 1432 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1433 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1436 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1437 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1448 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1449 { 1450 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1451 PetscErrorCode ierr; 1452 1453 PetscFunctionBegin; 1454 switch (op) { 1455 case MAT_ROW_ORIENTED: 1456 a->roworiented = flg; 1457 break; 1458 case MAT_KEEP_NONZERO_PATTERN: 1459 a->keepnonzeropattern = flg; 1460 break; 1461 case MAT_NEW_NONZERO_LOCATIONS: 1462 a->nonew = (flg ? 0 : 1); 1463 break; 1464 case MAT_NEW_NONZERO_LOCATION_ERR: 1465 a->nonew = (flg ? -1 : 0); 1466 break; 1467 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1468 a->nonew = (flg ? -2 : 0); 1469 break; 1470 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1471 a->nounused = (flg ? -1 : 0); 1472 break; 1473 case MAT_CHECK_COMPRESSED_ROW: 1474 a->compressedrow.check = flg; 1475 break; 1476 case MAT_NEW_DIAGONALS: 1477 case MAT_IGNORE_OFF_PROC_ENTRIES: 1478 case MAT_USE_HASH_TABLE: 1479 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1480 break; 1481 case MAT_SPD: 1482 case MAT_SYMMETRIC: 1483 case MAT_STRUCTURALLY_SYMMETRIC: 1484 case MAT_HERMITIAN: 1485 case MAT_SYMMETRY_ETERNAL: 1486 /* These options are handled directly by MatSetOption() */ 1487 break; 1488 default: 1489 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1490 } 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1496 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1497 { 1498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1501 MatScalar *aa,*aa_i; 1502 PetscScalar *v_i; 1503 1504 PetscFunctionBegin; 1505 bs = A->rmap->bs; 1506 ai = a->i; 1507 aj = a->j; 1508 aa = a->a; 1509 bs2 = a->bs2; 1510 1511 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1512 1513 bn = row/bs; /* Block number */ 1514 bp = row % bs; /* Block Position */ 1515 M = ai[bn+1] - ai[bn]; 1516 *nz = bs*M; 1517 1518 if (v) { 1519 *v = 0; 1520 if (*nz) { 1521 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1522 for (i=0; i<M; i++) { /* for each block in the block row */ 1523 v_i = *v + i*bs; 1524 aa_i = aa + bs2*(ai[bn] + i); 1525 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1526 } 1527 } 1528 } 1529 1530 if (idx) { 1531 *idx = 0; 1532 if (*nz) { 1533 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1534 for (i=0; i<M; i++) { /* for each block in the block row */ 1535 idx_i = *idx + i*bs; 1536 itmp = bs*aj[ai[bn] + i]; 1537 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1538 } 1539 } 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1546 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1547 { 1548 PetscErrorCode ierr; 1549 1550 PetscFunctionBegin; 1551 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1552 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1553 PetscFunctionReturn(0); 1554 } 1555 1556 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1560 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1561 { 1562 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1563 Mat C; 1564 PetscErrorCode ierr; 1565 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1566 PetscInt *rows,*cols,bs2=a->bs2; 1567 MatScalar *array; 1568 1569 PetscFunctionBegin; 1570 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1571 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1572 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1573 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1574 1575 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1576 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1577 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1578 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1579 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1580 ierr = PetscFree(col);CHKERRQ(ierr); 1581 } else { 1582 C = *B; 1583 } 1584 1585 array = a->a; 1586 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1587 for (i=0; i<mbs; i++) { 1588 cols[0] = i*bs; 1589 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1590 len = ai[i+1] - ai[i]; 1591 for (j=0; j<len; j++) { 1592 rows[0] = (*aj++)*bs; 1593 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1594 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1595 array += bs2; 1596 } 1597 } 1598 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1599 1600 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1601 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1602 1603 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1604 *B = C; 1605 } else { 1606 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 EXTERN_C_BEGIN 1612 #undef __FUNCT__ 1613 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1614 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1615 { 1616 PetscErrorCode ierr; 1617 Mat Btrans; 1618 1619 PetscFunctionBegin; 1620 *f = PETSC_FALSE; 1621 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1622 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1623 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1624 PetscFunctionReturn(0); 1625 } 1626 EXTERN_C_END 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1630 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1631 { 1632 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1633 PetscErrorCode ierr; 1634 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1635 int fd; 1636 PetscScalar *aa; 1637 FILE *file; 1638 1639 PetscFunctionBegin; 1640 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1641 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1642 col_lens[0] = MAT_FILE_CLASSID; 1643 1644 col_lens[1] = A->rmap->N; 1645 col_lens[2] = A->cmap->n; 1646 col_lens[3] = a->nz*bs2; 1647 1648 /* store lengths of each row and write (including header) to file */ 1649 count = 0; 1650 for (i=0; i<a->mbs; i++) { 1651 for (j=0; j<bs; j++) { 1652 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1653 } 1654 } 1655 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1656 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1657 1658 /* store column indices (zero start index) */ 1659 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1660 count = 0; 1661 for (i=0; i<a->mbs; i++) { 1662 for (j=0; j<bs; j++) { 1663 for (k=a->i[i]; k<a->i[i+1]; k++) { 1664 for (l=0; l<bs; l++) { 1665 jj[count++] = bs*a->j[k] + l; 1666 } 1667 } 1668 } 1669 } 1670 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1671 ierr = PetscFree(jj);CHKERRQ(ierr); 1672 1673 /* store nonzero values */ 1674 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1675 count = 0; 1676 for (i=0; i<a->mbs; i++) { 1677 for (j=0; j<bs; j++) { 1678 for (k=a->i[i]; k<a->i[i+1]; k++) { 1679 for (l=0; l<bs; l++) { 1680 aa[count++] = a->a[bs2*k + l*bs + j]; 1681 } 1682 } 1683 } 1684 } 1685 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1686 ierr = PetscFree(aa);CHKERRQ(ierr); 1687 1688 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1689 if (file) { 1690 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1691 } 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1697 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1698 { 1699 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1700 PetscErrorCode ierr; 1701 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1702 PetscViewerFormat format; 1703 1704 PetscFunctionBegin; 1705 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1706 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1707 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1708 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1709 Mat aij; 1710 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1711 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1712 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1713 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1714 PetscFunctionReturn(0); 1715 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1716 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1717 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1718 for (i=0; i<a->mbs; i++) { 1719 for (j=0; j<bs; j++) { 1720 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1721 for (k=a->i[i]; k<a->i[i+1]; k++) { 1722 for (l=0; l<bs; l++) { 1723 #if defined(PETSC_USE_COMPLEX) 1724 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1725 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1726 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1727 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1728 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1729 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1730 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1731 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1732 } 1733 #else 1734 if (a->a[bs2*k + l*bs + j] != 0.0) { 1735 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1736 } 1737 #endif 1738 } 1739 } 1740 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1741 } 1742 } 1743 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1744 } else { 1745 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1746 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1747 for (i=0; i<a->mbs; i++) { 1748 for (j=0; j<bs; j++) { 1749 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1750 for (k=a->i[i]; k<a->i[i+1]; k++) { 1751 for (l=0; l<bs; l++) { 1752 #if defined(PETSC_USE_COMPLEX) 1753 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1754 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1755 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1756 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1757 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1758 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1759 } else { 1760 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1761 } 1762 #else 1763 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1764 #endif 1765 } 1766 } 1767 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1768 } 1769 } 1770 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1771 } 1772 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1778 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1779 { 1780 Mat A = (Mat) Aa; 1781 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1784 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1785 MatScalar *aa; 1786 PetscViewer viewer; 1787 PetscViewerFormat format; 1788 1789 PetscFunctionBegin; 1790 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1791 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1792 1793 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1794 1795 /* loop over matrix elements drawing boxes */ 1796 1797 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1798 color = PETSC_DRAW_BLUE; 1799 for (i=0,row=0; i<mbs; i++,row+=bs) { 1800 for (j=a->i[i]; j<a->i[i+1]; j++) { 1801 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1802 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1803 aa = a->a + j*bs2; 1804 for (k=0; k<bs; k++) { 1805 for (l=0; l<bs; l++) { 1806 if (PetscRealPart(*aa++) >= 0.) continue; 1807 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1808 } 1809 } 1810 } 1811 } 1812 color = PETSC_DRAW_CYAN; 1813 for (i=0,row=0; i<mbs; i++,row+=bs) { 1814 for (j=a->i[i]; j<a->i[i+1]; j++) { 1815 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1816 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1817 aa = a->a + j*bs2; 1818 for (k=0; k<bs; k++) { 1819 for (l=0; l<bs; l++) { 1820 if (PetscRealPart(*aa++) != 0.) continue; 1821 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1822 } 1823 } 1824 } 1825 } 1826 color = PETSC_DRAW_RED; 1827 for (i=0,row=0; i<mbs; i++,row+=bs) { 1828 for (j=a->i[i]; j<a->i[i+1]; j++) { 1829 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1830 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1831 aa = a->a + j*bs2; 1832 for (k=0; k<bs; k++) { 1833 for (l=0; l<bs; l++) { 1834 if (PetscRealPart(*aa++) <= 0.) continue; 1835 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1836 } 1837 } 1838 } 1839 } 1840 } else { 1841 /* use contour shading to indicate magnitude of values */ 1842 /* first determine max of all nonzero values */ 1843 PetscDraw popup; 1844 PetscReal scale,maxv = 0.0; 1845 1846 for (i=0; i<a->nz*a->bs2; i++) { 1847 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1848 } 1849 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1850 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1851 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1852 for (i=0,row=0; i<mbs; i++,row+=bs) { 1853 for (j=a->i[i]; j<a->i[i+1]; j++) { 1854 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1855 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1856 aa = a->a + j*bs2; 1857 for (k=0; k<bs; k++) { 1858 for (l=0; l<bs; l++) { 1859 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1860 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1861 } 1862 } 1863 } 1864 } 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1871 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1872 { 1873 PetscErrorCode ierr; 1874 PetscReal xl,yl,xr,yr,w,h; 1875 PetscDraw draw; 1876 PetscBool isnull; 1877 1878 PetscFunctionBegin; 1879 1880 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1881 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1882 1883 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1884 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1885 xr += w; yr += h; xl = -w; yl = -h; 1886 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1887 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1888 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatView_SeqBAIJ" 1894 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1895 { 1896 PetscErrorCode ierr; 1897 PetscBool iascii,isbinary,isdraw; 1898 1899 PetscFunctionBegin; 1900 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1901 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1902 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1903 if (iascii){ 1904 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1905 } else if (isbinary) { 1906 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1907 } else if (isdraw) { 1908 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1909 } else { 1910 Mat B; 1911 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1912 ierr = MatView(B,viewer);CHKERRQ(ierr); 1913 ierr = MatDestroy(&B);CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1921 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1922 { 1923 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1924 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1925 PetscInt *ai = a->i,*ailen = a->ilen; 1926 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1927 MatScalar *ap,*aa = a->a; 1928 1929 PetscFunctionBegin; 1930 for (k=0; k<m; k++) { /* loop over rows */ 1931 row = im[k]; brow = row/bs; 1932 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1933 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1934 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1935 nrow = ailen[brow]; 1936 for (l=0; l<n; l++) { /* loop over columns */ 1937 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1938 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1939 col = in[l] ; 1940 bcol = col/bs; 1941 cidx = col%bs; 1942 ridx = row%bs; 1943 high = nrow; 1944 low = 0; /* assume unsorted */ 1945 while (high-low > 5) { 1946 t = (low+high)/2; 1947 if (rp[t] > bcol) high = t; 1948 else low = t; 1949 } 1950 for (i=low; i<high; i++) { 1951 if (rp[i] > bcol) break; 1952 if (rp[i] == bcol) { 1953 *v++ = ap[bs2*i+bs*cidx+ridx]; 1954 goto finished; 1955 } 1956 } 1957 *v++ = 0.0; 1958 finished:; 1959 } 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1966 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1967 { 1968 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1969 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1970 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1971 PetscErrorCode ierr; 1972 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1973 PetscBool roworiented=a->roworiented; 1974 const PetscScalar *value = v; 1975 MatScalar *ap,*aa = a->a,*bap; 1976 1977 PetscFunctionBegin; 1978 if (roworiented) { 1979 stepval = (n-1)*bs; 1980 } else { 1981 stepval = (m-1)*bs; 1982 } 1983 for (k=0; k<m; k++) { /* loop over added rows */ 1984 row = im[k]; 1985 if (row < 0) continue; 1986 #if defined(PETSC_USE_DEBUG) 1987 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1988 #endif 1989 rp = aj + ai[row]; 1990 ap = aa + bs2*ai[row]; 1991 rmax = imax[row]; 1992 nrow = ailen[row]; 1993 low = 0; 1994 high = nrow; 1995 for (l=0; l<n; l++) { /* loop over added columns */ 1996 if (in[l] < 0) continue; 1997 #if defined(PETSC_USE_DEBUG) 1998 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1999 #endif 2000 col = in[l]; 2001 if (roworiented) { 2002 value = v + (k*(stepval+bs) + l)*bs; 2003 } else { 2004 value = v + (l*(stepval+bs) + k)*bs; 2005 } 2006 if (col <= lastcol) low = 0; else high = nrow; 2007 lastcol = col; 2008 while (high-low > 7) { 2009 t = (low+high)/2; 2010 if (rp[t] > col) high = t; 2011 else low = t; 2012 } 2013 for (i=low; i<high; i++) { 2014 if (rp[i] > col) break; 2015 if (rp[i] == col) { 2016 bap = ap + bs2*i; 2017 if (roworiented) { 2018 if (is == ADD_VALUES) { 2019 for (ii=0; ii<bs; ii++,value+=stepval) { 2020 for (jj=ii; jj<bs2; jj+=bs) { 2021 bap[jj] += *value++; 2022 } 2023 } 2024 } else { 2025 for (ii=0; ii<bs; ii++,value+=stepval) { 2026 for (jj=ii; jj<bs2; jj+=bs) { 2027 bap[jj] = *value++; 2028 } 2029 } 2030 } 2031 } else { 2032 if (is == ADD_VALUES) { 2033 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2034 for (jj=0; jj<bs; jj++) { 2035 bap[jj] += value[jj]; 2036 } 2037 bap += bs; 2038 } 2039 } else { 2040 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2041 for (jj=0; jj<bs; jj++) { 2042 bap[jj] = value[jj]; 2043 } 2044 bap += bs; 2045 } 2046 } 2047 } 2048 goto noinsert2; 2049 } 2050 } 2051 if (nonew == 1) goto noinsert2; 2052 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2053 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2054 N = nrow++ - 1; high++; 2055 /* shift up all the later entries in this row */ 2056 for (ii=N; ii>=i; ii--) { 2057 rp[ii+1] = rp[ii]; 2058 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2059 } 2060 if (N >= i) { 2061 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2062 } 2063 rp[i] = col; 2064 bap = ap + bs2*i; 2065 if (roworiented) { 2066 for (ii=0; ii<bs; ii++,value+=stepval) { 2067 for (jj=ii; jj<bs2; jj+=bs) { 2068 bap[jj] = *value++; 2069 } 2070 } 2071 } else { 2072 for (ii=0; ii<bs; ii++,value+=stepval) { 2073 for (jj=0; jj<bs; jj++) { 2074 *bap++ = *value++; 2075 } 2076 } 2077 } 2078 noinsert2:; 2079 low = i; 2080 } 2081 ailen[row] = nrow; 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2088 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2089 { 2090 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2091 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2092 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2093 PetscErrorCode ierr; 2094 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2095 MatScalar *aa = a->a,*ap; 2096 PetscReal ratio=0.6; 2097 2098 PetscFunctionBegin; 2099 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2100 2101 if (m) rmax = ailen[0]; 2102 for (i=1; i<mbs; i++) { 2103 /* move each row back by the amount of empty slots (fshift) before it*/ 2104 fshift += imax[i-1] - ailen[i-1]; 2105 rmax = PetscMax(rmax,ailen[i]); 2106 if (fshift) { 2107 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2108 N = ailen[i]; 2109 for (j=0; j<N; j++) { 2110 ip[j-fshift] = ip[j]; 2111 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2112 } 2113 } 2114 ai[i] = ai[i-1] + ailen[i-1]; 2115 } 2116 if (mbs) { 2117 fshift += imax[mbs-1] - ailen[mbs-1]; 2118 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2119 } 2120 /* reset ilen and imax for each row */ 2121 for (i=0; i<mbs; i++) { 2122 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2123 } 2124 a->nz = ai[mbs]; 2125 2126 /* diagonals may have moved, so kill the diagonal pointers */ 2127 a->idiagvalid = PETSC_FALSE; 2128 if (fshift && a->diag) { 2129 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2130 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2131 a->diag = 0; 2132 } 2133 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2134 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2135 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2136 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2137 A->info.mallocs += a->reallocs; 2138 a->reallocs = 0; 2139 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2140 2141 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2142 A->same_nonzero = PETSC_TRUE; 2143 PetscFunctionReturn(0); 2144 } 2145 2146 /* 2147 This function returns an array of flags which indicate the locations of contiguous 2148 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2149 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2150 Assume: sizes should be long enough to hold all the values. 2151 */ 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2154 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2155 { 2156 PetscInt i,j,k,row; 2157 PetscBool flg; 2158 2159 PetscFunctionBegin; 2160 for (i=0,j=0; i<n; j++) { 2161 row = idx[i]; 2162 if (row%bs!=0) { /* Not the begining of a block */ 2163 sizes[j] = 1; 2164 i++; 2165 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2166 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2167 i++; 2168 } else { /* Begining of the block, so check if the complete block exists */ 2169 flg = PETSC_TRUE; 2170 for (k=1; k<bs; k++) { 2171 if (row+k != idx[i+k]) { /* break in the block */ 2172 flg = PETSC_FALSE; 2173 break; 2174 } 2175 } 2176 if (flg) { /* No break in the bs */ 2177 sizes[j] = bs; 2178 i+= bs; 2179 } else { 2180 sizes[j] = 1; 2181 i++; 2182 } 2183 } 2184 } 2185 *bs_max = j; 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2191 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2192 { 2193 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2194 PetscErrorCode ierr; 2195 PetscInt i,j,k,count,*rows; 2196 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2197 PetscScalar zero = 0.0; 2198 MatScalar *aa; 2199 const PetscScalar *xx; 2200 PetscScalar *bb; 2201 2202 PetscFunctionBegin; 2203 /* fix right hand side if needed */ 2204 if (x && b) { 2205 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2206 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2207 for (i=0; i<is_n; i++) { 2208 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2209 } 2210 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2211 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2212 } 2213 2214 /* Make a copy of the IS and sort it */ 2215 /* allocate memory for rows,sizes */ 2216 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2217 2218 /* copy IS values to rows, and sort them */ 2219 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2220 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2221 2222 if (baij->keepnonzeropattern) { 2223 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2224 bs_max = is_n; 2225 A->same_nonzero = PETSC_TRUE; 2226 } else { 2227 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2228 A->same_nonzero = PETSC_FALSE; 2229 } 2230 2231 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2232 row = rows[j]; 2233 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2234 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2235 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2236 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2237 if (diag != (PetscScalar)0.0) { 2238 if (baij->ilen[row/bs] > 0) { 2239 baij->ilen[row/bs] = 1; 2240 baij->j[baij->i[row/bs]] = row/bs; 2241 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2242 } 2243 /* Now insert all the diagonal values for this bs */ 2244 for (k=0; k<bs; k++) { 2245 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2246 } 2247 } else { /* (diag == 0.0) */ 2248 baij->ilen[row/bs] = 0; 2249 } /* end (diag == 0.0) */ 2250 } else { /* (sizes[i] != bs) */ 2251 #if defined (PETSC_USE_DEBUG) 2252 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2253 #endif 2254 for (k=0; k<count; k++) { 2255 aa[0] = zero; 2256 aa += bs; 2257 } 2258 if (diag != (PetscScalar)0.0) { 2259 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2260 } 2261 } 2262 } 2263 2264 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2265 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2271 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2272 { 2273 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2274 PetscErrorCode ierr; 2275 PetscInt i,j,k,count; 2276 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2277 PetscScalar zero = 0.0; 2278 MatScalar *aa; 2279 const PetscScalar *xx; 2280 PetscScalar *bb; 2281 PetscBool *zeroed,vecs = PETSC_FALSE; 2282 2283 PetscFunctionBegin; 2284 /* fix right hand side if needed */ 2285 if (x && b) { 2286 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2287 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2288 vecs = PETSC_TRUE; 2289 } 2290 A->same_nonzero = PETSC_TRUE; 2291 2292 /* zero the columns */ 2293 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2294 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2295 for (i=0; i<is_n; i++) { 2296 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2297 zeroed[is_idx[i]] = PETSC_TRUE; 2298 } 2299 for (i=0; i<A->rmap->N; i++) { 2300 if (!zeroed[i]) { 2301 row = i/bs; 2302 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2303 for (k=0; k<bs; k++) { 2304 col = bs*baij->j[j] + k; 2305 if (zeroed[col]) { 2306 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2307 if (vecs) bb[i] -= aa[0]*xx[col]; 2308 aa[0] = 0.0; 2309 } 2310 } 2311 } 2312 } else if (vecs) bb[i] = diag*xx[i]; 2313 } 2314 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2315 if (vecs) { 2316 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2317 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2318 } 2319 2320 /* zero the rows */ 2321 for (i=0; i<is_n; i++) { 2322 row = is_idx[i]; 2323 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2324 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2325 for (k=0; k<count; k++) { 2326 aa[0] = zero; 2327 aa += bs; 2328 } 2329 if (diag != (PetscScalar)0.0) { 2330 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2331 } 2332 } 2333 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2339 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2340 { 2341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2342 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2343 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2344 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2345 PetscErrorCode ierr; 2346 PetscInt ridx,cidx,bs2=a->bs2; 2347 PetscBool roworiented=a->roworiented; 2348 MatScalar *ap,value,*aa=a->a,*bap; 2349 2350 PetscFunctionBegin; 2351 if (v) PetscValidScalarPointer(v,6); 2352 for (k=0; k<m; k++) { /* loop over added rows */ 2353 row = im[k]; 2354 brow = row/bs; 2355 if (row < 0) continue; 2356 #if defined(PETSC_USE_DEBUG) 2357 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2358 #endif 2359 rp = aj + ai[brow]; 2360 ap = aa + bs2*ai[brow]; 2361 rmax = imax[brow]; 2362 nrow = ailen[brow]; 2363 low = 0; 2364 high = nrow; 2365 for (l=0; l<n; l++) { /* loop over added columns */ 2366 if (in[l] < 0) continue; 2367 #if defined(PETSC_USE_DEBUG) 2368 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2369 #endif 2370 col = in[l]; bcol = col/bs; 2371 ridx = row % bs; cidx = col % bs; 2372 if (roworiented) { 2373 value = v[l + k*n]; 2374 } else { 2375 value = v[k + l*m]; 2376 } 2377 if (col <= lastcol) low = 0; else high = nrow; 2378 lastcol = col; 2379 while (high-low > 7) { 2380 t = (low+high)/2; 2381 if (rp[t] > bcol) high = t; 2382 else low = t; 2383 } 2384 for (i=low; i<high; i++) { 2385 if (rp[i] > bcol) break; 2386 if (rp[i] == bcol) { 2387 bap = ap + bs2*i + bs*cidx + ridx; 2388 if (is == ADD_VALUES) *bap += value; 2389 else *bap = value; 2390 goto noinsert1; 2391 } 2392 } 2393 if (nonew == 1) goto noinsert1; 2394 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2395 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2396 N = nrow++ - 1; high++; 2397 /* shift up all the later entries in this row */ 2398 for (ii=N; ii>=i; ii--) { 2399 rp[ii+1] = rp[ii]; 2400 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2401 } 2402 if (N>=i) { 2403 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2404 } 2405 rp[i] = bcol; 2406 ap[bs2*i + bs*cidx + ridx] = value; 2407 a->nz++; 2408 noinsert1:; 2409 low = i; 2410 } 2411 ailen[brow] = nrow; 2412 } 2413 A->same_nonzero = PETSC_FALSE; 2414 PetscFunctionReturn(0); 2415 } 2416 2417 #undef __FUNCT__ 2418 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2419 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2420 { 2421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2422 Mat outA; 2423 PetscErrorCode ierr; 2424 PetscBool row_identity,col_identity; 2425 2426 PetscFunctionBegin; 2427 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2428 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2429 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2430 if (!row_identity || !col_identity) { 2431 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2432 } 2433 2434 outA = inA; 2435 inA->factortype = MAT_FACTOR_LU; 2436 2437 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2438 2439 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2440 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2441 a->row = row; 2442 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2443 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2444 a->col = col; 2445 2446 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2447 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2448 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2449 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2450 2451 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2452 if (!a->solve_work) { 2453 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2454 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2455 } 2456 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2457 2458 PetscFunctionReturn(0); 2459 } 2460 2461 EXTERN_C_BEGIN 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2464 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2465 { 2466 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2467 PetscInt i,nz,mbs; 2468 2469 PetscFunctionBegin; 2470 nz = baij->maxnz; 2471 mbs = baij->mbs; 2472 for (i=0; i<nz; i++) { 2473 baij->j[i] = indices[i]; 2474 } 2475 baij->nz = nz; 2476 for (i=0; i<mbs; i++) { 2477 baij->ilen[i] = baij->imax[i]; 2478 } 2479 PetscFunctionReturn(0); 2480 } 2481 EXTERN_C_END 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2485 /*@ 2486 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2487 in the matrix. 2488 2489 Input Parameters: 2490 + mat - the SeqBAIJ matrix 2491 - indices - the column indices 2492 2493 Level: advanced 2494 2495 Notes: 2496 This can be called if you have precomputed the nonzero structure of the 2497 matrix and want to provide it to the matrix object to improve the performance 2498 of the MatSetValues() operation. 2499 2500 You MUST have set the correct numbers of nonzeros per row in the call to 2501 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2502 2503 MUST be called before any calls to MatSetValues(); 2504 2505 @*/ 2506 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2507 { 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2512 PetscValidPointer(indices,2); 2513 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2519 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2520 { 2521 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2522 PetscErrorCode ierr; 2523 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2524 PetscReal atmp; 2525 PetscScalar *x,zero = 0.0; 2526 MatScalar *aa; 2527 PetscInt ncols,brow,krow,kcol; 2528 2529 PetscFunctionBegin; 2530 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2531 bs = A->rmap->bs; 2532 aa = a->a; 2533 ai = a->i; 2534 aj = a->j; 2535 mbs = a->mbs; 2536 2537 ierr = VecSet(v,zero);CHKERRQ(ierr); 2538 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2539 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2540 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2541 for (i=0; i<mbs; i++) { 2542 ncols = ai[1] - ai[0]; ai++; 2543 brow = bs*i; 2544 for (j=0; j<ncols; j++){ 2545 for (kcol=0; kcol<bs; kcol++){ 2546 for (krow=0; krow<bs; krow++){ 2547 atmp = PetscAbsScalar(*aa);aa++; 2548 row = brow + krow; /* row index */ 2549 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2550 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2551 } 2552 } 2553 aj++; 2554 } 2555 } 2556 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatCopy_SeqBAIJ" 2562 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2563 { 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 /* If the two matrices have the same copy implementation, use fast copy. */ 2568 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2569 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2570 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2571 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2572 2573 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2574 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2575 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2576 } else { 2577 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2578 } 2579 PetscFunctionReturn(0); 2580 } 2581 2582 #undef __FUNCT__ 2583 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2584 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2585 { 2586 PetscErrorCode ierr; 2587 2588 PetscFunctionBegin; 2589 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2590 PetscFunctionReturn(0); 2591 } 2592 2593 #undef __FUNCT__ 2594 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2595 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2596 { 2597 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2598 PetscFunctionBegin; 2599 *array = a->a; 2600 PetscFunctionReturn(0); 2601 } 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2605 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2606 { 2607 PetscFunctionBegin; 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2613 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2614 { 2615 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2616 PetscErrorCode ierr; 2617 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2618 PetscBLASInt one=1; 2619 2620 PetscFunctionBegin; 2621 if (str == SAME_NONZERO_PATTERN) { 2622 PetscScalar alpha = a; 2623 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2624 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2625 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2626 if (y->xtoy && y->XtoY != X) { 2627 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2628 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2629 } 2630 if (!y->xtoy) { /* get xtoy */ 2631 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2632 y->XtoY = X; 2633 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2634 } 2635 for (i=0; i<x->nz; i++) { 2636 j = 0; 2637 while (j < bs2){ 2638 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2639 j++; 2640 } 2641 } 2642 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2643 } else { 2644 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2651 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2652 { 2653 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2654 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2655 MatScalar *aa = a->a; 2656 2657 PetscFunctionBegin; 2658 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2664 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2665 { 2666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2667 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2668 MatScalar *aa = a->a; 2669 2670 PetscFunctionBegin; 2671 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2679 /* 2680 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2681 */ 2682 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2683 { 2684 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2685 PetscErrorCode ierr; 2686 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2687 PetscInt nz = a->i[m],row,*jj,mr,col; 2688 2689 PetscFunctionBegin; 2690 *nn = n; 2691 if (!ia) PetscFunctionReturn(0); 2692 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2693 else { 2694 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2695 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2696 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2697 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2698 jj = a->j; 2699 for (i=0; i<nz; i++) { 2700 collengths[jj[i]]++; 2701 } 2702 cia[0] = oshift; 2703 for (i=0; i<n; i++) { 2704 cia[i+1] = cia[i] + collengths[i]; 2705 } 2706 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2707 jj = a->j; 2708 for (row=0; row<m; row++) { 2709 mr = a->i[row+1] - a->i[row]; 2710 for (i=0; i<mr; i++) { 2711 col = *jj++; 2712 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2713 } 2714 } 2715 ierr = PetscFree(collengths);CHKERRQ(ierr); 2716 *ia = cia; *ja = cja; 2717 } 2718 PetscFunctionReturn(0); 2719 } 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2723 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2724 { 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 if (!ia) PetscFunctionReturn(0); 2729 ierr = PetscFree(*ia);CHKERRQ(ierr); 2730 ierr = PetscFree(*ja);CHKERRQ(ierr); 2731 PetscFunctionReturn(0); 2732 } 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2736 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2737 { 2738 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2739 PetscErrorCode ierr; 2740 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2741 PetscScalar dx,*y,*xx,*w3_array; 2742 PetscScalar *vscale_array; 2743 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2744 Vec w1=coloring->w1,w2=coloring->w2,w3; 2745 void *fctx = coloring->fctx; 2746 PetscBool flg = PETSC_FALSE; 2747 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2748 Vec x1_tmp; 2749 2750 PetscFunctionBegin; 2751 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2752 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2753 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2754 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2755 2756 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2757 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2758 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2759 if (flg) { 2760 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2761 } else { 2762 PetscBool assembled; 2763 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2764 if (assembled) { 2765 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2766 } 2767 } 2768 2769 x1_tmp = x1; 2770 if (!coloring->vscale){ 2771 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2772 } 2773 2774 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2775 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2776 } 2777 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2778 2779 /* Set w1 = F(x1) */ 2780 if (!coloring->fset) { 2781 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2782 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2783 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2784 } else { 2785 coloring->fset = PETSC_FALSE; 2786 } 2787 2788 if (!coloring->w3) { 2789 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2791 } 2792 w3 = coloring->w3; 2793 2794 CHKMEMQ; 2795 /* Compute all the local scale factors, including ghost points */ 2796 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2797 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2798 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2799 if (ctype == IS_COLORING_GHOSTED){ 2800 col_start = 0; col_end = N; 2801 } else if (ctype == IS_COLORING_GLOBAL){ 2802 xx = xx - start; 2803 vscale_array = vscale_array - start; 2804 col_start = start; col_end = N + start; 2805 } CHKMEMQ; 2806 for (col=col_start; col<col_end; col++){ 2807 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2808 if (coloring->htype[0] == 'w') { 2809 dx = 1.0 + unorm; 2810 } else { 2811 dx = xx[col]; 2812 } 2813 if (dx == (PetscScalar)0.0) dx = 1.0; 2814 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2815 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2816 dx *= epsilon; 2817 vscale_array[col] = (PetscScalar)1.0/dx; 2818 } CHKMEMQ; 2819 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2820 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2821 if (ctype == IS_COLORING_GLOBAL){ 2822 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2823 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2824 } 2825 CHKMEMQ; 2826 if (coloring->vscaleforrow) { 2827 vscaleforrow = coloring->vscaleforrow; 2828 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2829 2830 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2831 /* 2832 Loop over each color 2833 */ 2834 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2835 for (k=0; k<coloring->ncolors; k++) { 2836 coloring->currentcolor = k; 2837 for (i=0; i<bs; i++) { 2838 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2839 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2840 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2841 /* 2842 Loop over each column associated with color 2843 adding the perturbation to the vector w3. 2844 */ 2845 for (l=0; l<coloring->ncolumns[k]; l++) { 2846 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2847 if (coloring->htype[0] == 'w') { 2848 dx = 1.0 + unorm; 2849 } else { 2850 dx = xx[col]; 2851 } 2852 if (dx == (PetscScalar)0.0) dx = 1.0; 2853 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2854 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2855 dx *= epsilon; 2856 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2857 w3_array[col] += dx; 2858 } 2859 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2860 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2861 2862 /* 2863 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2864 w2 = F(x1 + dx) - F(x1) 2865 */ 2866 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2867 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2868 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2869 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2870 2871 /* 2872 Loop over rows of vector, putting results into Jacobian matrix 2873 */ 2874 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2875 for (l=0; l<coloring->nrows[k]; l++) { 2876 row = bs*coloring->rows[k][l]; /* local row index */ 2877 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2878 for (j=0; j<bs; j++) { 2879 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2880 srows[j] = row + start + j; 2881 } 2882 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2883 } 2884 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2885 } 2886 } /* endof for each color */ 2887 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2888 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2889 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2890 ierr = PetscFree(srows);CHKERRQ(ierr); 2891 2892 coloring->currentcolor = -1; 2893 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2894 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2895 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2896 PetscFunctionReturn(0); 2897 } 2898 2899 /* -------------------------------------------------------------------*/ 2900 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2901 MatGetRow_SeqBAIJ, 2902 MatRestoreRow_SeqBAIJ, 2903 MatMult_SeqBAIJ_N, 2904 /* 4*/ MatMultAdd_SeqBAIJ_N, 2905 MatMultTranspose_SeqBAIJ, 2906 MatMultTransposeAdd_SeqBAIJ, 2907 0, 2908 0, 2909 0, 2910 /*10*/ 0, 2911 MatLUFactor_SeqBAIJ, 2912 0, 2913 0, 2914 MatTranspose_SeqBAIJ, 2915 /*15*/ MatGetInfo_SeqBAIJ, 2916 MatEqual_SeqBAIJ, 2917 MatGetDiagonal_SeqBAIJ, 2918 MatDiagonalScale_SeqBAIJ, 2919 MatNorm_SeqBAIJ, 2920 /*20*/ 0, 2921 MatAssemblyEnd_SeqBAIJ, 2922 MatSetOption_SeqBAIJ, 2923 MatZeroEntries_SeqBAIJ, 2924 /*24*/ MatZeroRows_SeqBAIJ, 2925 0, 2926 0, 2927 0, 2928 0, 2929 /*29*/ MatSetUp_SeqBAIJ, 2930 0, 2931 0, 2932 0, 2933 0, 2934 /*34*/ MatDuplicate_SeqBAIJ, 2935 0, 2936 0, 2937 MatILUFactor_SeqBAIJ, 2938 0, 2939 /*39*/ MatAXPY_SeqBAIJ, 2940 MatGetSubMatrices_SeqBAIJ, 2941 MatIncreaseOverlap_SeqBAIJ, 2942 MatGetValues_SeqBAIJ, 2943 MatCopy_SeqBAIJ, 2944 /*44*/ 0, 2945 MatScale_SeqBAIJ, 2946 0, 2947 0, 2948 MatZeroRowsColumns_SeqBAIJ, 2949 /*49*/ 0, 2950 MatGetRowIJ_SeqBAIJ, 2951 MatRestoreRowIJ_SeqBAIJ, 2952 MatGetColumnIJ_SeqBAIJ, 2953 MatRestoreColumnIJ_SeqBAIJ, 2954 /*54*/ MatFDColoringCreate_SeqAIJ, 2955 0, 2956 0, 2957 0, 2958 MatSetValuesBlocked_SeqBAIJ, 2959 /*59*/ MatGetSubMatrix_SeqBAIJ, 2960 MatDestroy_SeqBAIJ, 2961 MatView_SeqBAIJ, 2962 0, 2963 0, 2964 /*64*/ 0, 2965 0, 2966 0, 2967 0, 2968 0, 2969 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2970 0, 2971 MatConvert_Basic, 2972 0, 2973 0, 2974 /*74*/ 0, 2975 MatFDColoringApply_BAIJ, 2976 0, 2977 0, 2978 0, 2979 /*79*/ 0, 2980 0, 2981 0, 2982 0, 2983 MatLoad_SeqBAIJ, 2984 /*84*/ 0, 2985 0, 2986 0, 2987 0, 2988 0, 2989 /*89*/ 0, 2990 0, 2991 0, 2992 0, 2993 0, 2994 /*94*/ 0, 2995 0, 2996 0, 2997 0, 2998 0, 2999 /*99*/0, 3000 0, 3001 0, 3002 0, 3003 0, 3004 /*104*/0, 3005 MatRealPart_SeqBAIJ, 3006 MatImaginaryPart_SeqBAIJ, 3007 0, 3008 0, 3009 /*109*/0, 3010 0, 3011 0, 3012 0, 3013 MatMissingDiagonal_SeqBAIJ, 3014 /*114*/0, 3015 0, 3016 0, 3017 0, 3018 0, 3019 /*119*/0, 3020 0, 3021 MatMultHermitianTranspose_SeqBAIJ, 3022 MatMultHermitianTransposeAdd_SeqBAIJ, 3023 0, 3024 /*124*/0, 3025 0, 3026 MatInvertBlockDiagonal_SeqBAIJ 3027 }; 3028 3029 EXTERN_C_BEGIN 3030 #undef __FUNCT__ 3031 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3032 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3033 { 3034 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3035 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3036 PetscErrorCode ierr; 3037 3038 PetscFunctionBegin; 3039 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3040 3041 /* allocate space for values if not already there */ 3042 if (!aij->saved_values) { 3043 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3044 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3045 } 3046 3047 /* copy values over */ 3048 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3049 PetscFunctionReturn(0); 3050 } 3051 EXTERN_C_END 3052 3053 EXTERN_C_BEGIN 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3056 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3057 { 3058 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3059 PetscErrorCode ierr; 3060 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3061 3062 PetscFunctionBegin; 3063 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3064 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3065 3066 /* copy values over */ 3067 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 EXTERN_C_END 3071 3072 EXTERN_C_BEGIN 3073 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3074 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3075 EXTERN_C_END 3076 3077 EXTERN_C_BEGIN 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3080 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3081 { 3082 Mat_SeqBAIJ *b; 3083 PetscErrorCode ierr; 3084 PetscInt i,mbs,nbs,bs2; 3085 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3086 3087 PetscFunctionBegin; 3088 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3089 if (nz == MAT_SKIP_ALLOCATION) { 3090 skipallocation = PETSC_TRUE; 3091 nz = 0; 3092 } 3093 3094 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3095 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3096 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3097 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3098 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3099 3100 B->preallocated = PETSC_TRUE; 3101 3102 mbs = B->rmap->n/bs; 3103 nbs = B->cmap->n/bs; 3104 bs2 = bs*bs; 3105 3106 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3107 3108 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3109 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3110 if (nnz) { 3111 for (i=0; i<mbs; i++) { 3112 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3113 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3114 } 3115 } 3116 3117 b = (Mat_SeqBAIJ*)B->data; 3118 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3119 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3120 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3121 3122 if (!flg) { 3123 switch (bs) { 3124 case 1: 3125 B->ops->mult = MatMult_SeqBAIJ_1; 3126 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3127 B->ops->sor = MatSOR_SeqBAIJ_1; 3128 break; 3129 case 2: 3130 B->ops->mult = MatMult_SeqBAIJ_2; 3131 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3132 B->ops->sor = MatSOR_SeqBAIJ_2; 3133 break; 3134 case 3: 3135 B->ops->mult = MatMult_SeqBAIJ_3; 3136 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3137 B->ops->sor = MatSOR_SeqBAIJ_3; 3138 break; 3139 case 4: 3140 B->ops->mult = MatMult_SeqBAIJ_4; 3141 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3142 B->ops->sor = MatSOR_SeqBAIJ_4; 3143 break; 3144 case 5: 3145 B->ops->mult = MatMult_SeqBAIJ_5; 3146 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3147 B->ops->sor = MatSOR_SeqBAIJ_5; 3148 break; 3149 case 6: 3150 B->ops->mult = MatMult_SeqBAIJ_6; 3151 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3152 B->ops->sor = MatSOR_SeqBAIJ_6; 3153 break; 3154 case 7: 3155 B->ops->mult = MatMult_SeqBAIJ_7; 3156 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3157 B->ops->sor = MatSOR_SeqBAIJ_7; 3158 break; 3159 case 15: 3160 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3161 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3162 B->ops->sor = MatSOR_SeqBAIJ_N; 3163 break; 3164 default: 3165 B->ops->mult = MatMult_SeqBAIJ_N; 3166 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3167 B->ops->sor = MatSOR_SeqBAIJ_N; 3168 break; 3169 } 3170 } 3171 b->mbs = mbs; 3172 b->nbs = nbs; 3173 if (!skipallocation) { 3174 if (!b->imax) { 3175 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3176 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3177 b->free_imax_ilen = PETSC_TRUE; 3178 } 3179 /* b->ilen will count nonzeros in each block row so far. */ 3180 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3181 if (!nnz) { 3182 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3183 else if (nz < 0) nz = 1; 3184 for (i=0; i<mbs; i++) b->imax[i] = nz; 3185 nz = nz*mbs; 3186 } else { 3187 nz = 0; 3188 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3189 } 3190 3191 /* allocate the matrix space */ 3192 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3193 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3194 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3195 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3196 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3197 b->singlemalloc = PETSC_TRUE; 3198 b->i[0] = 0; 3199 for (i=1; i<mbs+1; i++) { 3200 b->i[i] = b->i[i-1] + b->imax[i-1]; 3201 } 3202 b->free_a = PETSC_TRUE; 3203 b->free_ij = PETSC_TRUE; 3204 } else { 3205 b->free_a = PETSC_FALSE; 3206 b->free_ij = PETSC_FALSE; 3207 } 3208 3209 b->bs2 = bs2; 3210 b->mbs = mbs; 3211 b->nz = 0; 3212 b->maxnz = nz; 3213 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3214 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3215 PetscFunctionReturn(0); 3216 } 3217 EXTERN_C_END 3218 3219 EXTERN_C_BEGIN 3220 #undef __FUNCT__ 3221 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3222 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3223 { 3224 PetscInt i,m,nz,nz_max=0,*nnz; 3225 PetscScalar *values=0; 3226 PetscErrorCode ierr; 3227 3228 PetscFunctionBegin; 3229 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3230 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3231 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3232 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3233 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3234 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3235 m = B->rmap->n/bs; 3236 3237 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3238 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3239 for (i=0; i<m; i++) { 3240 nz = ii[i+1]- ii[i]; 3241 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3242 nz_max = PetscMax(nz_max, nz); 3243 nnz[i] = nz; 3244 } 3245 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3246 ierr = PetscFree(nnz);CHKERRQ(ierr); 3247 3248 values = (PetscScalar*)V; 3249 if (!values) { 3250 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3251 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3252 } 3253 for (i=0; i<m; i++) { 3254 PetscInt ncols = ii[i+1] - ii[i]; 3255 const PetscInt *icols = jj + ii[i]; 3256 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3257 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3258 } 3259 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3260 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3261 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3262 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3263 PetscFunctionReturn(0); 3264 } 3265 EXTERN_C_END 3266 3267 3268 EXTERN_C_BEGIN 3269 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3270 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3271 #if defined(PETSC_HAVE_MUMPS) 3272 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3273 #endif 3274 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3275 EXTERN_C_END 3276 3277 /*MC 3278 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3279 block sparse compressed row format. 3280 3281 Options Database Keys: 3282 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3283 3284 Level: beginner 3285 3286 .seealso: MatCreateSeqBAIJ() 3287 M*/ 3288 3289 EXTERN_C_BEGIN 3290 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3291 EXTERN_C_END 3292 3293 EXTERN_C_BEGIN 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "MatCreate_SeqBAIJ" 3296 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3297 { 3298 PetscErrorCode ierr; 3299 PetscMPIInt size; 3300 Mat_SeqBAIJ *b; 3301 3302 PetscFunctionBegin; 3303 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3304 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3305 3306 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3307 B->data = (void*)b; 3308 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3309 b->row = 0; 3310 b->col = 0; 3311 b->icol = 0; 3312 b->reallocs = 0; 3313 b->saved_values = 0; 3314 3315 b->roworiented = PETSC_TRUE; 3316 b->nonew = 0; 3317 b->diag = 0; 3318 b->solve_work = 0; 3319 b->mult_work = 0; 3320 B->spptr = 0; 3321 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3322 b->keepnonzeropattern = PETSC_FALSE; 3323 b->xtoy = 0; 3324 b->XtoY = 0; 3325 B->same_nonzero = PETSC_FALSE; 3326 3327 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3328 "MatGetFactorAvailable_seqbaij_petsc", 3329 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3330 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3331 "MatGetFactor_seqbaij_petsc", 3332 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3333 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3334 "MatGetFactor_seqbaij_bstrm", 3335 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3336 #if defined(PETSC_HAVE_MUMPS) 3337 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3338 #endif 3339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3340 "MatInvertBlockDiagonal_SeqBAIJ", 3341 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3343 "MatStoreValues_SeqBAIJ", 3344 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3346 "MatRetrieveValues_SeqBAIJ", 3347 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3348 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3349 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3350 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3352 "MatConvert_SeqBAIJ_SeqAIJ", 3353 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3355 "MatConvert_SeqBAIJ_SeqSBAIJ", 3356 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3358 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3359 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3361 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3362 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3364 "MatConvert_SeqBAIJ_SeqBSTRM", 3365 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3367 "MatIsTranspose_SeqBAIJ", 3368 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3369 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3370 PetscFunctionReturn(0); 3371 } 3372 EXTERN_C_END 3373 3374 #undef __FUNCT__ 3375 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3376 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3377 { 3378 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3379 PetscErrorCode ierr; 3380 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3381 3382 PetscFunctionBegin; 3383 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3384 3385 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3386 c->imax = a->imax; 3387 c->ilen = a->ilen; 3388 c->free_imax_ilen = PETSC_FALSE; 3389 } else { 3390 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3391 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3392 for (i=0; i<mbs; i++) { 3393 c->imax[i] = a->imax[i]; 3394 c->ilen[i] = a->ilen[i]; 3395 } 3396 c->free_imax_ilen = PETSC_TRUE; 3397 } 3398 3399 /* allocate the matrix space */ 3400 if (mallocmatspace){ 3401 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3402 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3403 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3404 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3405 c->i = a->i; 3406 c->j = a->j; 3407 c->singlemalloc = PETSC_FALSE; 3408 c->free_a = PETSC_TRUE; 3409 c->free_ij = PETSC_FALSE; 3410 c->parent = A; 3411 C->preallocated = PETSC_TRUE; 3412 C->assembled = PETSC_TRUE; 3413 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3414 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3415 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3416 } else { 3417 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3418 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3419 c->singlemalloc = PETSC_TRUE; 3420 c->free_a = PETSC_TRUE; 3421 c->free_ij = PETSC_TRUE; 3422 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3423 if (mbs > 0) { 3424 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3425 if (cpvalues == MAT_COPY_VALUES) { 3426 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3427 } else { 3428 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3429 } 3430 } 3431 C->preallocated = PETSC_TRUE; 3432 C->assembled = PETSC_TRUE; 3433 } 3434 } 3435 3436 c->roworiented = a->roworiented; 3437 c->nonew = a->nonew; 3438 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3439 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3440 c->bs2 = a->bs2; 3441 c->mbs = a->mbs; 3442 c->nbs = a->nbs; 3443 3444 if (a->diag) { 3445 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3446 c->diag = a->diag; 3447 c->free_diag = PETSC_FALSE; 3448 } else { 3449 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3450 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3451 for (i=0; i<mbs; i++) { 3452 c->diag[i] = a->diag[i]; 3453 } 3454 c->free_diag = PETSC_TRUE; 3455 } 3456 } else c->diag = 0; 3457 c->nz = a->nz; 3458 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3459 c->solve_work = 0; 3460 c->mult_work = 0; 3461 3462 c->compressedrow.use = a->compressedrow.use; 3463 c->compressedrow.nrows = a->compressedrow.nrows; 3464 c->compressedrow.check = a->compressedrow.check; 3465 if (a->compressedrow.use){ 3466 i = a->compressedrow.nrows; 3467 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3468 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3469 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3470 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3471 } else { 3472 c->compressedrow.use = PETSC_FALSE; 3473 c->compressedrow.i = PETSC_NULL; 3474 c->compressedrow.rindex = PETSC_NULL; 3475 } 3476 C->same_nonzero = A->same_nonzero; 3477 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3478 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3479 PetscFunctionReturn(0); 3480 } 3481 3482 #undef __FUNCT__ 3483 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3484 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3485 { 3486 PetscErrorCode ierr; 3487 3488 PetscFunctionBegin; 3489 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3490 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3491 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3492 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3493 PetscFunctionReturn(0); 3494 } 3495 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatLoad_SeqBAIJ" 3498 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3499 { 3500 Mat_SeqBAIJ *a; 3501 PetscErrorCode ierr; 3502 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3503 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3504 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3505 PetscInt *masked,nmask,tmp,bs2,ishift; 3506 PetscMPIInt size; 3507 int fd; 3508 PetscScalar *aa; 3509 MPI_Comm comm = ((PetscObject)viewer)->comm; 3510 3511 PetscFunctionBegin; 3512 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3513 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3514 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3515 bs2 = bs*bs; 3516 3517 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3518 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3519 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3520 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3521 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3522 M = header[1]; N = header[2]; nz = header[3]; 3523 3524 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3525 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3526 3527 /* 3528 This code adds extra rows to make sure the number of rows is 3529 divisible by the blocksize 3530 */ 3531 mbs = M/bs; 3532 extra_rows = bs - M + bs*(mbs); 3533 if (extra_rows == bs) extra_rows = 0; 3534 else mbs++; 3535 if (extra_rows) { 3536 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3537 } 3538 3539 /* Set global sizes if not already set */ 3540 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3541 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3542 } else { /* Check if the matrix global sizes are correct */ 3543 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3544 if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */ 3545 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3546 } 3547 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3548 } 3549 3550 /* read in row lengths */ 3551 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3552 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3553 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3554 3555 /* read in column indices */ 3556 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3557 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3558 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3559 3560 /* loop over row lengths determining block row lengths */ 3561 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3562 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3563 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3564 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3565 rowcount = 0; 3566 nzcount = 0; 3567 for (i=0; i<mbs; i++) { 3568 nmask = 0; 3569 for (j=0; j<bs; j++) { 3570 kmax = rowlengths[rowcount]; 3571 for (k=0; k<kmax; k++) { 3572 tmp = jj[nzcount++]/bs; 3573 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3574 } 3575 rowcount++; 3576 } 3577 browlengths[i] += nmask; 3578 /* zero out the mask elements we set */ 3579 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3580 } 3581 3582 /* Do preallocation */ 3583 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3584 a = (Mat_SeqBAIJ*)newmat->data; 3585 3586 /* set matrix "i" values */ 3587 a->i[0] = 0; 3588 for (i=1; i<= mbs; i++) { 3589 a->i[i] = a->i[i-1] + browlengths[i-1]; 3590 a->ilen[i-1] = browlengths[i-1]; 3591 } 3592 a->nz = 0; 3593 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3594 3595 /* read in nonzero values */ 3596 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3597 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3598 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3599 3600 /* set "a" and "j" values into matrix */ 3601 nzcount = 0; jcount = 0; 3602 for (i=0; i<mbs; i++) { 3603 nzcountb = nzcount; 3604 nmask = 0; 3605 for (j=0; j<bs; j++) { 3606 kmax = rowlengths[i*bs+j]; 3607 for (k=0; k<kmax; k++) { 3608 tmp = jj[nzcount++]/bs; 3609 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3610 } 3611 } 3612 /* sort the masked values */ 3613 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3614 3615 /* set "j" values into matrix */ 3616 maskcount = 1; 3617 for (j=0; j<nmask; j++) { 3618 a->j[jcount++] = masked[j]; 3619 mask[masked[j]] = maskcount++; 3620 } 3621 /* set "a" values into matrix */ 3622 ishift = bs2*a->i[i]; 3623 for (j=0; j<bs; j++) { 3624 kmax = rowlengths[i*bs+j]; 3625 for (k=0; k<kmax; k++) { 3626 tmp = jj[nzcountb]/bs ; 3627 block = mask[tmp] - 1; 3628 point = jj[nzcountb] - bs*tmp; 3629 idx = ishift + bs2*block + j + bs*point; 3630 a->a[idx] = (MatScalar)aa[nzcountb++]; 3631 } 3632 } 3633 /* zero out the mask elements we set */ 3634 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3635 } 3636 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3637 3638 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3639 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3640 ierr = PetscFree(aa);CHKERRQ(ierr); 3641 ierr = PetscFree(jj);CHKERRQ(ierr); 3642 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3643 3644 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3645 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3646 PetscFunctionReturn(0); 3647 } 3648 3649 #undef __FUNCT__ 3650 #define __FUNCT__ "MatCreateSeqBAIJ" 3651 /*@C 3652 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3653 compressed row) format. For good matrix assembly performance the 3654 user should preallocate the matrix storage by setting the parameter nz 3655 (or the array nnz). By setting these parameters accurately, performance 3656 during matrix assembly can be increased by more than a factor of 50. 3657 3658 Collective on MPI_Comm 3659 3660 Input Parameters: 3661 + comm - MPI communicator, set to PETSC_COMM_SELF 3662 . bs - size of block 3663 . m - number of rows 3664 . n - number of columns 3665 . nz - number of nonzero blocks per block row (same for all rows) 3666 - nnz - array containing the number of nonzero blocks in the various block rows 3667 (possibly different for each block row) or PETSC_NULL 3668 3669 Output Parameter: 3670 . A - the matrix 3671 3672 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3673 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3674 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3675 3676 Options Database Keys: 3677 . -mat_no_unroll - uses code that does not unroll the loops in the 3678 block calculations (much slower) 3679 . -mat_block_size - size of the blocks to use 3680 3681 Level: intermediate 3682 3683 Notes: 3684 The number of rows and columns must be divisible by blocksize. 3685 3686 If the nnz parameter is given then the nz parameter is ignored 3687 3688 A nonzero block is any block that as 1 or more nonzeros in it 3689 3690 The block AIJ format is fully compatible with standard Fortran 77 3691 storage. That is, the stored row and column indices can begin at 3692 either one (as in Fortran) or zero. See the users' manual for details. 3693 3694 Specify the preallocated storage with either nz or nnz (not both). 3695 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3696 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3697 matrices. 3698 3699 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3700 @*/ 3701 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3702 { 3703 PetscErrorCode ierr; 3704 3705 PetscFunctionBegin; 3706 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3707 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3708 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3709 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3710 PetscFunctionReturn(0); 3711 } 3712 3713 #undef __FUNCT__ 3714 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3715 /*@C 3716 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3717 per row in the matrix. For good matrix assembly performance the 3718 user should preallocate the matrix storage by setting the parameter nz 3719 (or the array nnz). By setting these parameters accurately, performance 3720 during matrix assembly can be increased by more than a factor of 50. 3721 3722 Collective on MPI_Comm 3723 3724 Input Parameters: 3725 + A - the matrix 3726 . bs - size of block 3727 . nz - number of block nonzeros per block row (same for all rows) 3728 - nnz - array containing the number of block nonzeros in the various block rows 3729 (possibly different for each block row) or PETSC_NULL 3730 3731 Options Database Keys: 3732 . -mat_no_unroll - uses code that does not unroll the loops in the 3733 block calculations (much slower) 3734 . -mat_block_size - size of the blocks to use 3735 3736 Level: intermediate 3737 3738 Notes: 3739 If the nnz parameter is given then the nz parameter is ignored 3740 3741 You can call MatGetInfo() to get information on how effective the preallocation was; 3742 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3743 You can also run with the option -info and look for messages with the string 3744 malloc in them to see if additional memory allocation was needed. 3745 3746 The block AIJ format is fully compatible with standard Fortran 77 3747 storage. That is, the stored row and column indices can begin at 3748 either one (as in Fortran) or zero. See the users' manual for details. 3749 3750 Specify the preallocated storage with either nz or nnz (not both). 3751 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3752 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3753 3754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3755 @*/ 3756 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3757 { 3758 PetscErrorCode ierr; 3759 3760 PetscFunctionBegin; 3761 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3762 PetscValidType(B,1); 3763 PetscValidLogicalCollectiveInt(B,bs,2); 3764 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3765 PetscFunctionReturn(0); 3766 } 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3770 /*@C 3771 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3772 (the default sequential PETSc format). 3773 3774 Collective on MPI_Comm 3775 3776 Input Parameters: 3777 + A - the matrix 3778 . i - the indices into j for the start of each local row (starts with zero) 3779 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3780 - v - optional values in the matrix 3781 3782 Level: developer 3783 3784 .keywords: matrix, aij, compressed row, sparse 3785 3786 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3787 @*/ 3788 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3789 { 3790 PetscErrorCode ierr; 3791 3792 PetscFunctionBegin; 3793 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3794 PetscValidType(B,1); 3795 PetscValidLogicalCollectiveInt(B,bs,2); 3796 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3797 PetscFunctionReturn(0); 3798 } 3799 3800 3801 #undef __FUNCT__ 3802 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3803 /*@ 3804 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3805 3806 Collective on MPI_Comm 3807 3808 Input Parameters: 3809 + comm - must be an MPI communicator of size 1 3810 . bs - size of block 3811 . m - number of rows 3812 . n - number of columns 3813 . i - row indices 3814 . j - column indices 3815 - a - matrix values 3816 3817 Output Parameter: 3818 . mat - the matrix 3819 3820 Level: advanced 3821 3822 Notes: 3823 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3824 once the matrix is destroyed 3825 3826 You cannot set new nonzero locations into this matrix, that will generate an error. 3827 3828 The i and j indices are 0 based 3829 3830 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3831 3832 3833 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3834 3835 @*/ 3836 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3837 { 3838 PetscErrorCode ierr; 3839 PetscInt ii; 3840 Mat_SeqBAIJ *baij; 3841 3842 PetscFunctionBegin; 3843 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3844 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3845 3846 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3847 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3848 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3849 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3850 baij = (Mat_SeqBAIJ*)(*mat)->data; 3851 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3852 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3853 3854 baij->i = i; 3855 baij->j = j; 3856 baij->a = a; 3857 baij->singlemalloc = PETSC_FALSE; 3858 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3859 baij->free_a = PETSC_FALSE; 3860 baij->free_ij = PETSC_FALSE; 3861 3862 for (ii=0; ii<m; ii++) { 3863 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3864 #if defined(PETSC_USE_DEBUG) 3865 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3866 #endif 3867 } 3868 #if defined(PETSC_USE_DEBUG) 3869 for (ii=0; ii<baij->i[m]; ii++) { 3870 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3871 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3872 } 3873 #endif 3874 3875 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3876 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3877 PetscFunctionReturn(0); 3878 } 3879