1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs){ 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,s1,s2; 224 const PetscScalar *b; 225 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 232 its = its*lits; 233 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 250 i2 = 2; 251 idiag += 4; 252 for (i=1; i<m; i++) { 253 v = aa + 4*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; 257 for (j=0; j<nz; j++) { 258 idx = 2*vi[j]; 259 it = 4*j; 260 x1 = x[idx]; x2 = x[1+idx]; 261 s1 -= v[it]*x1 + v[it+2]*x2; 262 s2 -= v[it+1]*x1 + v[it+3]*x2; 263 } 264 x[i2] = idiag[0]*s1 + idiag[2]*s2; 265 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 266 idiag += 4; 267 i2 += 2; 268 } 269 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 270 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 271 } 272 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 273 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 274 i2 = 0; 275 mdiag = a->idiag+4*a->mbs; 276 for (i=0; i<m; i++) { 277 x1 = x[i2]; x2 = x[i2+1]; 278 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 279 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 280 mdiag += 4; 281 i2 += 2; 282 } 283 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 284 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 286 } 287 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 288 idiag = a->idiag+4*a->mbs - 4; 289 i2 = 2*m - 2; 290 x1 = x[i2]; x2 = x[i2+1]; 291 x[i2] = idiag[0]*x1 + idiag[2]*x2; 292 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 293 idiag -= 4; 294 i2 -= 2; 295 for (i=m-2; i>=0; i--) { 296 v = aa + 4*(diag[i]+1); 297 vi = aj + diag[i] + 1; 298 nz = ai[i+1] - diag[i] - 1; 299 s1 = x[i2]; s2 = x[i2+1]; 300 for (j=0; j<nz; j++) { 301 idx = 2*vi[j]; 302 it = 4*j; 303 x1 = x[idx]; x2 = x[1+idx]; 304 s1 -= v[it]*x1 + v[it+2]*x2; 305 s2 -= v[it+1]*x1 + v[it+3]*x2; 306 } 307 x[i2] = idiag[0]*s1 + idiag[2]*s2; 308 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 309 idiag -= 4; 310 i2 -= 2; 311 } 312 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 313 } 314 } else { 315 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 316 } 317 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 PetscScalar *x,x1,x2,x3,s1,s2,s3; 328 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 329 const PetscScalar *b; 330 PetscErrorCode ierr; 331 PetscInt m = a->mbs,i,i2,nz,idx; 332 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 333 334 PetscFunctionBegin; 335 its = its*lits; 336 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 337 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 338 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 339 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 340 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 341 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 342 343 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 344 345 diag = a->diag; 346 idiag = a->idiag; 347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 349 350 if (flag & SOR_ZERO_INITIAL_GUESS) { 351 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 352 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 353 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 354 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 355 i2 = 3; 356 idiag += 9; 357 for (i=1; i<m; i++) { 358 v = aa + 9*ai[i]; 359 vi = aj + ai[i]; 360 nz = diag[i] - ai[i]; 361 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 362 while (nz--) { 363 idx = 3*(*vi++); 364 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 365 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 366 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 367 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 368 v += 9; 369 } 370 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 371 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 372 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 373 idiag += 9; 374 i2 += 3; 375 } 376 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 377 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 378 } 379 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 380 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 381 i2 = 0; 382 mdiag = a->idiag+9*a->mbs; 383 for (i=0; i<m; i++) { 384 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 385 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 386 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 387 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 388 mdiag += 9; 389 i2 += 3; 390 } 391 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 392 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 393 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 394 } 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 396 idiag = a->idiag+9*a->mbs - 9; 397 i2 = 3*m - 3; 398 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 399 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 400 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 401 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 402 idiag -= 9; 403 i2 -= 3; 404 for (i=m-2; i>=0; i--) { 405 v = aa + 9*(diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 409 while (nz--) { 410 idx = 3*(*vi++); 411 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 412 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 413 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 414 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 415 v += 9; 416 } 417 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 418 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 419 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 420 idiag -= 9; 421 i2 -= 3; 422 } 423 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 424 } 425 } else { 426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 427 } 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 439 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 440 const PetscScalar *b; 441 PetscErrorCode ierr; 442 PetscInt m = a->mbs,i,i2,nz,idx; 443 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 444 445 PetscFunctionBegin; 446 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 447 its = its*lits; 448 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 449 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 450 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 451 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 452 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 453 454 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 455 456 diag = a->diag; 457 idiag = a->idiag; 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 460 461 if (flag & SOR_ZERO_INITIAL_GUESS) { 462 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 463 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 464 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 465 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 466 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 467 i2 = 4; 468 idiag += 16; 469 for (i=1; i<m; i++) { 470 v = aa + 16*ai[i]; 471 vi = aj + ai[i]; 472 nz = diag[i] - ai[i]; 473 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 474 while (nz--) { 475 idx = 4*(*vi++); 476 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 477 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 478 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 479 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 480 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 481 v += 16; 482 } 483 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 484 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 485 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 486 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 487 idiag += 16; 488 i2 += 4; 489 } 490 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 491 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 492 } 493 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 494 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 495 i2 = 0; 496 mdiag = a->idiag+16*a->mbs; 497 for (i=0; i<m; i++) { 498 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 499 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 500 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 501 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 502 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 503 mdiag += 16; 504 i2 += 4; 505 } 506 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 507 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 508 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 509 } 510 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 511 idiag = a->idiag+16*a->mbs - 16; 512 i2 = 4*m - 4; 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 515 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 516 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 517 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 518 idiag -= 16; 519 i2 -= 4; 520 for (i=m-2; i>=0; i--) { 521 v = aa + 16*(diag[i]+1); 522 vi = aj + diag[i] + 1; 523 nz = ai[i+1] - diag[i] - 1; 524 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 525 while (nz--) { 526 idx = 4*(*vi++); 527 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 528 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 529 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 530 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 531 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 532 v += 16; 533 } 534 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 535 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 536 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 537 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 538 idiag -= 16; 539 i2 -= 4; 540 } 541 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 542 } 543 } else { 544 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 545 } 546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 547 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 554 { 555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 556 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 557 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 558 const PetscScalar *b; 559 PetscErrorCode ierr; 560 PetscInt m = a->mbs,i,i2,nz,idx; 561 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 562 563 PetscFunctionBegin; 564 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 565 its = its*lits; 566 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 567 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 568 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 569 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 570 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 571 572 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 573 574 diag = a->diag; 575 idiag = a->idiag; 576 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 578 579 if (flag & SOR_ZERO_INITIAL_GUESS) { 580 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 581 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 582 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 583 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 584 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 585 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 586 i2 = 5; 587 idiag += 25; 588 for (i=1; i<m; i++) { 589 v = aa + 25*ai[i]; 590 vi = aj + ai[i]; 591 nz = diag[i] - ai[i]; 592 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 593 while (nz--) { 594 idx = 5*(*vi++); 595 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 596 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 597 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 598 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 599 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 600 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 601 v += 25; 602 } 603 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 604 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 605 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 606 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 607 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 608 idiag += 25; 609 i2 += 5; 610 } 611 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 612 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 613 } 614 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 615 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 616 i2 = 0; 617 mdiag = a->idiag+25*a->mbs; 618 for (i=0; i<m; i++) { 619 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 620 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 621 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 622 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 623 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 624 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 625 mdiag += 25; 626 i2 += 5; 627 } 628 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 629 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 630 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 631 } 632 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 633 idiag = a->idiag+25*a->mbs - 25; 634 i2 = 5*m - 5; 635 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 636 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 637 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 638 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 639 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 640 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 641 idiag -= 25; 642 i2 -= 5; 643 for (i=m-2; i>=0; i--) { 644 v = aa + 25*(diag[i]+1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i+1] - diag[i] - 1; 647 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 651 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 652 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 653 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 654 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 655 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 656 v += 25; 657 } 658 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 659 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 660 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 661 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 662 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 663 idiag -= 25; 664 i2 -= 5; 665 } 666 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 670 } 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 679 { 680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 681 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 682 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 683 const PetscScalar *b; 684 PetscErrorCode ierr; 685 PetscInt m = a->mbs,i,i2,nz,idx; 686 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 687 688 PetscFunctionBegin; 689 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 690 its = its*lits; 691 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 692 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 693 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 694 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 695 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 696 697 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 698 699 diag = a->diag; 700 idiag = a->idiag; 701 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 703 704 if (flag & SOR_ZERO_INITIAL_GUESS) { 705 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 706 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 707 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 708 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 709 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 710 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 711 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 712 i2 = 6; 713 idiag += 36; 714 for (i=1; i<m; i++) { 715 v = aa + 36*ai[i]; 716 vi = aj + ai[i]; 717 nz = diag[i] - ai[i]; 718 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 719 while (nz--) { 720 idx = 6*(*vi++); 721 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 722 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 723 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 724 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 725 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 726 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 727 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 728 v += 36; 729 } 730 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 731 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 732 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 733 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 734 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 735 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 736 idiag += 36; 737 i2 += 6; 738 } 739 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 740 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 741 } 742 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 743 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 744 i2 = 0; 745 mdiag = a->idiag+36*a->mbs; 746 for (i=0; i<m; i++) { 747 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 748 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 749 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 750 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 751 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 752 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 753 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 754 mdiag += 36; 755 i2 += 6; 756 } 757 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 758 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 759 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 760 } 761 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 762 idiag = a->idiag+36*a->mbs - 36; 763 i2 = 6*m - 6; 764 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 765 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 766 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 767 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 768 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 769 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 770 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 771 idiag -= 36; 772 i2 -= 6; 773 for (i=m-2; i>=0; i--) { 774 v = aa + 36*(diag[i]+1); 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 778 while (nz--) { 779 idx = 6*(*vi++); 780 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 781 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 782 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 783 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 784 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 785 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 786 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 787 v += 36; 788 } 789 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 790 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 791 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 792 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 793 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 794 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 795 idiag -= 36; 796 i2 -= 6; 797 } 798 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 799 } 800 } else { 801 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 802 } 803 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 813 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 814 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 815 const PetscScalar *b; 816 PetscErrorCode ierr; 817 PetscInt m = a->mbs,i,i2,nz,idx; 818 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 819 820 PetscFunctionBegin; 821 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 822 its = its*lits; 823 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 824 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 825 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 826 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 827 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 828 829 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 830 831 diag = a->diag; 832 idiag = a->idiag; 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 835 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 838 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 839 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 840 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 841 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 842 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 843 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 844 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 845 i2 = 7; 846 idiag += 49; 847 for (i=1; i<m; i++) { 848 v = aa + 49*ai[i]; 849 vi = aj + ai[i]; 850 nz = diag[i] - ai[i]; 851 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 852 while (nz--) { 853 idx = 7*(*vi++); 854 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 855 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 856 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 857 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 858 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 859 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 860 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 861 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 862 v += 49; 863 } 864 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 865 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 866 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 867 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 868 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 869 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 870 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 871 idiag += 49; 872 i2 += 7; 873 } 874 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 875 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 876 } 877 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 878 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 879 i2 = 0; 880 mdiag = a->idiag+49*a->mbs; 881 for (i=0; i<m; i++) { 882 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 883 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 884 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 885 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 886 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 887 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 888 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 889 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 890 mdiag += 49; 891 i2 += 7; 892 } 893 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 894 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 895 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 896 } 897 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 898 idiag = a->idiag+49*a->mbs - 49; 899 i2 = 7*m - 7; 900 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 901 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 902 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 903 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 904 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 905 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 906 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 907 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 908 idiag -= 49; 909 i2 -= 7; 910 for (i=m-2; i>=0; i--) { 911 v = aa + 49*(diag[i]+1); 912 vi = aj + diag[i] + 1; 913 nz = ai[i+1] - diag[i] - 1; 914 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 918 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 919 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 920 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 921 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 922 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 923 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 924 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 925 v += 49; 926 } 927 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 928 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 929 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 930 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 931 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 932 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 933 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 934 idiag -= 49; 935 i2 -= 7; 936 } 937 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 938 } 939 } else { 940 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 941 } 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 950 { 951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 952 PetscScalar *x,*work,*w,*workt; 953 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 954 const PetscScalar *b; 955 PetscErrorCode ierr; 956 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 957 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 958 959 PetscFunctionBegin; 960 its = its*lits; 961 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 962 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 963 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 964 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 965 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 966 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 967 968 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 969 970 diag = a->diag; 971 idiag = a->idiag; 972 if (!a->mult_work) { 973 k = PetscMax(A->rmap->n,A->cmap->n); 974 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 975 } 976 work = a->mult_work; 977 if (!a->sor_work) { 978 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 979 } 980 w = a->sor_work; 981 982 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 984 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 987 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 988 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 989 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 990 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 991 i2 = bs; 992 idiag += bs2; 993 for (i=1; i<m; i++) { 994 v = aa + bs2*ai[i]; 995 vi = aj + ai[i]; 996 nz = diag[i] - ai[i]; 997 998 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 999 /* copy all rows of x that are needed into contiguous space */ 1000 workt = work; 1001 for (j=0; j<nz; j++) { 1002 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1003 workt += bs; 1004 } 1005 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1006 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1007 while (nz--) { 1008 idx = N*(*vi++); 1009 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1010 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1011 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1012 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1013 v += N2; 1014 } */ 1015 1016 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1017 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1018 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1019 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1020 1021 idiag += bs2; 1022 i2 += bs; 1023 } 1024 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1025 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1026 } 1027 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1028 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1029 i2 = 0; 1030 mdiag = a->idiag+bs2*a->mbs; 1031 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 for (i=0; i<m; i++) { 1033 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1034 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1035 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1036 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1037 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1038 1039 mdiag += bs2; 1040 i2 += bs; 1041 } 1042 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1043 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1044 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1047 idiag = a->idiag+bs2*a->mbs - bs2; 1048 i2 = bs*m - bs; 1049 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1050 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1051 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1052 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1053 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1054 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1055 idiag -= bs2; 1056 i2 -= bs; 1057 for (i=m-2; i>=0; i--) { 1058 v = aa + bs2*(diag[i]+1); 1059 vi = aj + diag[i] + 1; 1060 nz = ai[i+1] - diag[i] - 1; 1061 1062 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 /* copy all rows of x that are needed into contiguous space */ 1064 workt = work; 1065 for (j=0; j<nz; j++) { 1066 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1067 workt += bs; 1068 } 1069 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1070 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1071 while (nz--) { 1072 idx = N*(*vi++); 1073 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1074 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1075 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1076 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1077 v += N2; 1078 } */ 1079 1080 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1081 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1082 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1083 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1084 idiag -= bs2; 1085 i2 -= bs; 1086 } 1087 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1088 } 1089 } else { 1090 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1091 } 1092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 /* 1098 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1099 */ 1100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1103 #define matsetvaluesblocked4_ matsetvaluesblocked4 1104 #endif 1105 1106 EXTERN_C_BEGIN 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "matsetvaluesblocked4_" 1109 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1110 { 1111 Mat A = *AA; 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1114 PetscInt *ai=a->i,*ailen=a->ilen; 1115 PetscInt *aj=a->j,stepval,lastcol = -1; 1116 const PetscScalar *value = v; 1117 MatScalar *ap,*aa = a->a,*bap; 1118 1119 PetscFunctionBegin; 1120 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1121 stepval = (n-1)*4; 1122 for (k=0; k<m; k++) { /* loop over added rows */ 1123 row = im[k]; 1124 rp = aj + ai[row]; 1125 ap = aa + 16*ai[row]; 1126 nrow = ailen[row]; 1127 low = 0; 1128 high = nrow; 1129 for (l=0; l<n; l++) { /* loop over added columns */ 1130 col = in[l]; 1131 if (col <= lastcol) low = 0; else high = nrow; 1132 lastcol = col; 1133 value = v + k*(stepval+4 + l)*4; 1134 while (high-low > 7) { 1135 t = (low+high)/2; 1136 if (rp[t] > col) high = t; 1137 else low = t; 1138 } 1139 for (i=low; i<high; i++) { 1140 if (rp[i] > col) break; 1141 if (rp[i] == col) { 1142 bap = ap + 16*i; 1143 for (ii=0; ii<4; ii++,value+=stepval) { 1144 for (jj=ii; jj<16; jj+=4) { 1145 bap[jj] += *value++; 1146 } 1147 } 1148 goto noinsert2; 1149 } 1150 } 1151 N = nrow++ - 1; 1152 high++; /* added new column index thus must search to one higher than before */ 1153 /* shift up all the later entries in this row */ 1154 for (ii=N; ii>=i; ii--) { 1155 rp[ii+1] = rp[ii]; 1156 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1157 } 1158 if (N >= i) { 1159 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1160 } 1161 rp[i] = col; 1162 bap = ap + 16*i; 1163 for (ii=0; ii<4; ii++,value+=stepval) { 1164 for (jj=ii; jj<16; jj+=4) { 1165 bap[jj] = *value++; 1166 } 1167 } 1168 noinsert2:; 1169 low = i; 1170 } 1171 ailen[row] = nrow; 1172 } 1173 PetscFunctionReturnVoid(); 1174 } 1175 EXTERN_C_END 1176 1177 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1178 #define matsetvalues4_ MATSETVALUES4 1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1180 #define matsetvalues4_ matsetvalues4 1181 #endif 1182 1183 EXTERN_C_BEGIN 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatSetValues4_" 1186 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1187 { 1188 Mat A = *AA; 1189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1190 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1191 PetscInt *ai=a->i,*ailen=a->ilen; 1192 PetscInt *aj=a->j,brow,bcol; 1193 PetscInt ridx,cidx,lastcol = -1; 1194 MatScalar *ap,value,*aa=a->a,*bap; 1195 1196 PetscFunctionBegin; 1197 for (k=0; k<m; k++) { /* loop over added rows */ 1198 row = im[k]; brow = row/4; 1199 rp = aj + ai[brow]; 1200 ap = aa + 16*ai[brow]; 1201 nrow = ailen[brow]; 1202 low = 0; 1203 high = nrow; 1204 for (l=0; l<n; l++) { /* loop over added columns */ 1205 col = in[l]; bcol = col/4; 1206 ridx = row % 4; cidx = col % 4; 1207 value = v[l + k*n]; 1208 if (col <= lastcol) low = 0; else high = nrow; 1209 lastcol = col; 1210 while (high-low > 7) { 1211 t = (low+high)/2; 1212 if (rp[t] > bcol) high = t; 1213 else low = t; 1214 } 1215 for (i=low; i<high; i++) { 1216 if (rp[i] > bcol) break; 1217 if (rp[i] == bcol) { 1218 bap = ap + 16*i + 4*cidx + ridx; 1219 *bap += value; 1220 goto noinsert1; 1221 } 1222 } 1223 N = nrow++ - 1; 1224 high++; /* added new column thus must search to one higher than before */ 1225 /* shift up all the later entries in this row */ 1226 for (ii=N; ii>=i; ii--) { 1227 rp[ii+1] = rp[ii]; 1228 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1229 } 1230 if (N>=i) { 1231 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1232 } 1233 rp[i] = bcol; 1234 ap[16*i + 4*cidx + ridx] = value; 1235 noinsert1:; 1236 low = i; 1237 } 1238 ailen[brow] = nrow; 1239 } 1240 PetscFunctionReturnVoid(); 1241 } 1242 EXTERN_C_END 1243 1244 /* 1245 Checks for missing diagonals 1246 */ 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 PetscInt *diag,*jj = a->j,i; 1254 1255 PetscFunctionBegin; 1256 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1257 *missing = PETSC_FALSE; 1258 if (A->rmap->n > 0 && !jj) { 1259 *missing = PETSC_TRUE; 1260 if (d) *d = 0; 1261 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1262 } else { 1263 diag = a->diag; 1264 for (i=0; i<a->mbs; i++) { 1265 if (jj[diag[i]] != i) { 1266 *missing = PETSC_TRUE; 1267 if (d) *d = i; 1268 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1269 break; 1270 } 1271 } 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt i,j,m = a->mbs; 1283 1284 PetscFunctionBegin; 1285 if (!a->diag) { 1286 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1287 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1288 a->free_diag = PETSC_TRUE; 1289 } 1290 for (i=0; i<m; i++) { 1291 a->diag[i] = a->i[i+1]; 1292 for (j=a->i[i]; j<a->i[i+1]; j++) { 1293 if (a->j[j] == i) { 1294 a->diag[i] = j; 1295 break; 1296 } 1297 } 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1305 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1306 { 1307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1310 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1311 1312 PetscFunctionBegin; 1313 *nn = n; 1314 if (!ia) PetscFunctionReturn(0); 1315 if (symmetric) { 1316 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1317 nz = tia[n]; 1318 } else { 1319 tia = a->i; tja = a->j; 1320 } 1321 1322 if (!blockcompressed && bs > 1) { 1323 (*nn) *= bs; 1324 /* malloc & create the natural set of indices */ 1325 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1326 if (n) { 1327 (*ia)[0] = 0; 1328 for (j=1; j<bs; j++) { 1329 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1330 } 1331 } 1332 1333 for (i=1; i<n; i++) { 1334 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1335 for (j=1; j<bs; j++) { 1336 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1337 } 1338 } 1339 if (n) { 1340 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1341 } 1342 1343 if (inja) { 1344 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1345 cnt = 0; 1346 for (i=0; i<n; i++) { 1347 for (j=0; j<bs; j++) { 1348 for (k=tia[i]; k<tia[i+1]; k++) { 1349 for (l=0; l<bs; l++) { 1350 (*ja)[cnt++] = bs*tja[k] + l; 1351 } 1352 } 1353 } 1354 } 1355 } 1356 1357 n *= bs; 1358 nz *= bs*bs; 1359 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1360 ierr = PetscFree(tia);CHKERRQ(ierr); 1361 ierr = PetscFree(tja);CHKERRQ(ierr); 1362 } 1363 } else if (oshift == 1) { 1364 if (symmetric) { 1365 PetscInt nz = tia[A->rmap->n/bs]; 1366 /* add 1 to i and j indices */ 1367 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1368 *ia = tia; 1369 if (ja) { 1370 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1371 *ja = tja; 1372 } 1373 } else { 1374 PetscInt nz = a->i[A->rmap->n/bs]; 1375 /* malloc space and add 1 to i and j indices */ 1376 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1377 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1378 if (ja) { 1379 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1380 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1381 } 1382 } 1383 } else { 1384 *ia = tia; 1385 if (ja) *ja = tja; 1386 } 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1392 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1393 { 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 if (!ia) PetscFunctionReturn(0); 1398 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1399 ierr = PetscFree(*ia);CHKERRQ(ierr); 1400 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1407 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1408 { 1409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 #if defined(PETSC_USE_LOG) 1414 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1415 #endif 1416 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1417 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1418 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1419 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1420 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1421 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1422 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1423 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1424 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1425 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1426 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1427 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1428 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1429 1430 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1431 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1432 ierr = PetscFree(A->data);CHKERRQ(ierr); 1433 1434 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1436 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1437 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1443 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1450 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1451 { 1452 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1453 PetscErrorCode ierr; 1454 1455 PetscFunctionBegin; 1456 switch (op) { 1457 case MAT_ROW_ORIENTED: 1458 a->roworiented = flg; 1459 break; 1460 case MAT_KEEP_NONZERO_PATTERN: 1461 a->keepnonzeropattern = flg; 1462 break; 1463 case MAT_NEW_NONZERO_LOCATIONS: 1464 a->nonew = (flg ? 0 : 1); 1465 break; 1466 case MAT_NEW_NONZERO_LOCATION_ERR: 1467 a->nonew = (flg ? -1 : 0); 1468 break; 1469 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1470 a->nonew = (flg ? -2 : 0); 1471 break; 1472 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1473 a->nounused = (flg ? -1 : 0); 1474 break; 1475 case MAT_CHECK_COMPRESSED_ROW: 1476 a->compressedrow.check = flg; 1477 break; 1478 case MAT_NEW_DIAGONALS: 1479 case MAT_IGNORE_OFF_PROC_ENTRIES: 1480 case MAT_USE_HASH_TABLE: 1481 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1482 break; 1483 case MAT_SPD: 1484 case MAT_SYMMETRIC: 1485 case MAT_STRUCTURALLY_SYMMETRIC: 1486 case MAT_HERMITIAN: 1487 case MAT_SYMMETRY_ETERNAL: 1488 /* These options are handled directly by MatSetOption() */ 1489 break; 1490 default: 1491 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1498 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1499 { 1500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1503 MatScalar *aa,*aa_i; 1504 PetscScalar *v_i; 1505 1506 PetscFunctionBegin; 1507 bs = A->rmap->bs; 1508 ai = a->i; 1509 aj = a->j; 1510 aa = a->a; 1511 bs2 = a->bs2; 1512 1513 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1514 1515 bn = row/bs; /* Block number */ 1516 bp = row % bs; /* Block Position */ 1517 M = ai[bn+1] - ai[bn]; 1518 *nz = bs*M; 1519 1520 if (v) { 1521 *v = 0; 1522 if (*nz) { 1523 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1524 for (i=0; i<M; i++) { /* for each block in the block row */ 1525 v_i = *v + i*bs; 1526 aa_i = aa + bs2*(ai[bn] + i); 1527 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1528 } 1529 } 1530 } 1531 1532 if (idx) { 1533 *idx = 0; 1534 if (*nz) { 1535 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1536 for (i=0; i<M; i++) { /* for each block in the block row */ 1537 idx_i = *idx + i*bs; 1538 itmp = bs*aj[ai[bn] + i]; 1539 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1540 } 1541 } 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1548 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1554 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1555 PetscFunctionReturn(0); 1556 } 1557 1558 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1562 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1563 { 1564 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1565 Mat C; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1568 PetscInt *rows,*cols,bs2=a->bs2; 1569 MatScalar *array; 1570 1571 PetscFunctionBegin; 1572 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1573 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1574 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1575 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1576 1577 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1578 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1579 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1580 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1581 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1582 ierr = PetscFree(col);CHKERRQ(ierr); 1583 } else { 1584 C = *B; 1585 } 1586 1587 array = a->a; 1588 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1589 for (i=0; i<mbs; i++) { 1590 cols[0] = i*bs; 1591 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1592 len = ai[i+1] - ai[i]; 1593 for (j=0; j<len; j++) { 1594 rows[0] = (*aj++)*bs; 1595 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1596 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1597 array += bs2; 1598 } 1599 } 1600 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1601 1602 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1603 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1604 1605 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1606 *B = C; 1607 } else { 1608 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1609 } 1610 PetscFunctionReturn(0); 1611 } 1612 1613 EXTERN_C_BEGIN 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1616 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1617 { 1618 PetscErrorCode ierr; 1619 Mat Btrans; 1620 1621 PetscFunctionBegin; 1622 *f = PETSC_FALSE; 1623 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1624 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1625 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 EXTERN_C_END 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1632 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1633 { 1634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1635 PetscErrorCode ierr; 1636 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1637 int fd; 1638 PetscScalar *aa; 1639 FILE *file; 1640 1641 PetscFunctionBegin; 1642 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1643 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1644 col_lens[0] = MAT_FILE_CLASSID; 1645 1646 col_lens[1] = A->rmap->N; 1647 col_lens[2] = A->cmap->n; 1648 col_lens[3] = a->nz*bs2; 1649 1650 /* store lengths of each row and write (including header) to file */ 1651 count = 0; 1652 for (i=0; i<a->mbs; i++) { 1653 for (j=0; j<bs; j++) { 1654 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1655 } 1656 } 1657 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1658 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1659 1660 /* store column indices (zero start index) */ 1661 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1662 count = 0; 1663 for (i=0; i<a->mbs; i++) { 1664 for (j=0; j<bs; j++) { 1665 for (k=a->i[i]; k<a->i[i+1]; k++) { 1666 for (l=0; l<bs; l++) { 1667 jj[count++] = bs*a->j[k] + l; 1668 } 1669 } 1670 } 1671 } 1672 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1673 ierr = PetscFree(jj);CHKERRQ(ierr); 1674 1675 /* store nonzero values */ 1676 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1677 count = 0; 1678 for (i=0; i<a->mbs; i++) { 1679 for (j=0; j<bs; j++) { 1680 for (k=a->i[i]; k<a->i[i+1]; k++) { 1681 for (l=0; l<bs; l++) { 1682 aa[count++] = a->a[bs2*k + l*bs + j]; 1683 } 1684 } 1685 } 1686 } 1687 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1688 ierr = PetscFree(aa);CHKERRQ(ierr); 1689 1690 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1691 if (file) { 1692 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1699 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1700 { 1701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1702 PetscErrorCode ierr; 1703 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1704 PetscViewerFormat format; 1705 1706 PetscFunctionBegin; 1707 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1708 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1709 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1710 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1711 Mat aij; 1712 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1713 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1714 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1715 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1716 PetscFunctionReturn(0); 1717 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1718 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1719 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1720 for (i=0; i<a->mbs; i++) { 1721 for (j=0; j<bs; j++) { 1722 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1723 for (k=a->i[i]; k<a->i[i+1]; k++) { 1724 for (l=0; l<bs; l++) { 1725 #if defined(PETSC_USE_COMPLEX) 1726 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1727 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1728 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1729 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1730 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1731 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1732 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1733 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1734 } 1735 #else 1736 if (a->a[bs2*k + l*bs + j] != 0.0) { 1737 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1738 } 1739 #endif 1740 } 1741 } 1742 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1743 } 1744 } 1745 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1746 } else { 1747 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1748 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1749 for (i=0; i<a->mbs; i++) { 1750 for (j=0; j<bs; j++) { 1751 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1752 for (k=a->i[i]; k<a->i[i+1]; k++) { 1753 for (l=0; l<bs; l++) { 1754 #if defined(PETSC_USE_COMPLEX) 1755 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1756 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1757 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1758 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1760 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1761 } else { 1762 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1763 } 1764 #else 1765 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1766 #endif 1767 } 1768 } 1769 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1770 } 1771 } 1772 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1773 } 1774 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1780 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1781 { 1782 Mat A = (Mat) Aa; 1783 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1784 PetscErrorCode ierr; 1785 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1786 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1787 MatScalar *aa; 1788 PetscViewer viewer; 1789 PetscViewerFormat format; 1790 1791 PetscFunctionBegin; 1792 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1793 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1794 1795 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1796 1797 /* loop over matrix elements drawing boxes */ 1798 1799 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1800 color = PETSC_DRAW_BLUE; 1801 for (i=0,row=0; i<mbs; i++,row+=bs) { 1802 for (j=a->i[i]; j<a->i[i+1]; j++) { 1803 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1804 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1805 aa = a->a + j*bs2; 1806 for (k=0; k<bs; k++) { 1807 for (l=0; l<bs; l++) { 1808 if (PetscRealPart(*aa++) >= 0.) continue; 1809 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1810 } 1811 } 1812 } 1813 } 1814 color = PETSC_DRAW_CYAN; 1815 for (i=0,row=0; i<mbs; i++,row+=bs) { 1816 for (j=a->i[i]; j<a->i[i+1]; j++) { 1817 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1818 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1819 aa = a->a + j*bs2; 1820 for (k=0; k<bs; k++) { 1821 for (l=0; l<bs; l++) { 1822 if (PetscRealPart(*aa++) != 0.) continue; 1823 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1824 } 1825 } 1826 } 1827 } 1828 color = PETSC_DRAW_RED; 1829 for (i=0,row=0; i<mbs; i++,row+=bs) { 1830 for (j=a->i[i]; j<a->i[i+1]; j++) { 1831 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1832 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1833 aa = a->a + j*bs2; 1834 for (k=0; k<bs; k++) { 1835 for (l=0; l<bs; l++) { 1836 if (PetscRealPart(*aa++) <= 0.) continue; 1837 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1838 } 1839 } 1840 } 1841 } 1842 } else { 1843 /* use contour shading to indicate magnitude of values */ 1844 /* first determine max of all nonzero values */ 1845 PetscDraw popup; 1846 PetscReal scale,maxv = 0.0; 1847 1848 for (i=0; i<a->nz*a->bs2; i++) { 1849 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1850 } 1851 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1852 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1853 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1854 for (i=0,row=0; i<mbs; i++,row+=bs) { 1855 for (j=a->i[i]; j<a->i[i+1]; j++) { 1856 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1857 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1858 aa = a->a + j*bs2; 1859 for (k=0; k<bs; k++) { 1860 for (l=0; l<bs; l++) { 1861 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1862 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1863 } 1864 } 1865 } 1866 } 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1873 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1874 { 1875 PetscErrorCode ierr; 1876 PetscReal xl,yl,xr,yr,w,h; 1877 PetscDraw draw; 1878 PetscBool isnull; 1879 1880 PetscFunctionBegin; 1881 1882 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1883 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1884 1885 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1886 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1887 xr += w; yr += h; xl = -w; yl = -h; 1888 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1889 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1890 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatView_SeqBAIJ" 1896 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1897 { 1898 PetscErrorCode ierr; 1899 PetscBool iascii,isbinary,isdraw; 1900 1901 PetscFunctionBegin; 1902 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1903 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1904 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1905 if (iascii){ 1906 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1907 } else if (isbinary) { 1908 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1909 } else if (isdraw) { 1910 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1911 } else { 1912 Mat B; 1913 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1914 ierr = MatView(B,viewer);CHKERRQ(ierr); 1915 ierr = MatDestroy(&B);CHKERRQ(ierr); 1916 } 1917 PetscFunctionReturn(0); 1918 } 1919 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1923 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1924 { 1925 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1926 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1927 PetscInt *ai = a->i,*ailen = a->ilen; 1928 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1929 MatScalar *ap,*aa = a->a; 1930 1931 PetscFunctionBegin; 1932 for (k=0; k<m; k++) { /* loop over rows */ 1933 row = im[k]; brow = row/bs; 1934 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1935 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1936 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1937 nrow = ailen[brow]; 1938 for (l=0; l<n; l++) { /* loop over columns */ 1939 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1940 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1941 col = in[l] ; 1942 bcol = col/bs; 1943 cidx = col%bs; 1944 ridx = row%bs; 1945 high = nrow; 1946 low = 0; /* assume unsorted */ 1947 while (high-low > 5) { 1948 t = (low+high)/2; 1949 if (rp[t] > bcol) high = t; 1950 else low = t; 1951 } 1952 for (i=low; i<high; i++) { 1953 if (rp[i] > bcol) break; 1954 if (rp[i] == bcol) { 1955 *v++ = ap[bs2*i+bs*cidx+ridx]; 1956 goto finished; 1957 } 1958 } 1959 *v++ = 0.0; 1960 finished:; 1961 } 1962 } 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1968 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1969 { 1970 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1971 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1972 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1973 PetscErrorCode ierr; 1974 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1975 PetscBool roworiented=a->roworiented; 1976 const PetscScalar *value = v; 1977 MatScalar *ap,*aa = a->a,*bap; 1978 1979 PetscFunctionBegin; 1980 if (roworiented) { 1981 stepval = (n-1)*bs; 1982 } else { 1983 stepval = (m-1)*bs; 1984 } 1985 for (k=0; k<m; k++) { /* loop over added rows */ 1986 row = im[k]; 1987 if (row < 0) continue; 1988 #if defined(PETSC_USE_DEBUG) 1989 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1990 #endif 1991 rp = aj + ai[row]; 1992 ap = aa + bs2*ai[row]; 1993 rmax = imax[row]; 1994 nrow = ailen[row]; 1995 low = 0; 1996 high = nrow; 1997 for (l=0; l<n; l++) { /* loop over added columns */ 1998 if (in[l] < 0) continue; 1999 #if defined(PETSC_USE_DEBUG) 2000 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 2001 #endif 2002 col = in[l]; 2003 if (roworiented) { 2004 value = v + (k*(stepval+bs) + l)*bs; 2005 } else { 2006 value = v + (l*(stepval+bs) + k)*bs; 2007 } 2008 if (col <= lastcol) low = 0; else high = nrow; 2009 lastcol = col; 2010 while (high-low > 7) { 2011 t = (low+high)/2; 2012 if (rp[t] > col) high = t; 2013 else low = t; 2014 } 2015 for (i=low; i<high; i++) { 2016 if (rp[i] > col) break; 2017 if (rp[i] == col) { 2018 bap = ap + bs2*i; 2019 if (roworiented) { 2020 if (is == ADD_VALUES) { 2021 for (ii=0; ii<bs; ii++,value+=stepval) { 2022 for (jj=ii; jj<bs2; jj+=bs) { 2023 bap[jj] += *value++; 2024 } 2025 } 2026 } else { 2027 for (ii=0; ii<bs; ii++,value+=stepval) { 2028 for (jj=ii; jj<bs2; jj+=bs) { 2029 bap[jj] = *value++; 2030 } 2031 } 2032 } 2033 } else { 2034 if (is == ADD_VALUES) { 2035 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2036 for (jj=0; jj<bs; jj++) { 2037 bap[jj] += value[jj]; 2038 } 2039 bap += bs; 2040 } 2041 } else { 2042 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2043 for (jj=0; jj<bs; jj++) { 2044 bap[jj] = value[jj]; 2045 } 2046 bap += bs; 2047 } 2048 } 2049 } 2050 goto noinsert2; 2051 } 2052 } 2053 if (nonew == 1) goto noinsert2; 2054 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2055 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2056 N = nrow++ - 1; high++; 2057 /* shift up all the later entries in this row */ 2058 for (ii=N; ii>=i; ii--) { 2059 rp[ii+1] = rp[ii]; 2060 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2061 } 2062 if (N >= i) { 2063 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2064 } 2065 rp[i] = col; 2066 bap = ap + bs2*i; 2067 if (roworiented) { 2068 for (ii=0; ii<bs; ii++,value+=stepval) { 2069 for (jj=ii; jj<bs2; jj+=bs) { 2070 bap[jj] = *value++; 2071 } 2072 } 2073 } else { 2074 for (ii=0; ii<bs; ii++,value+=stepval) { 2075 for (jj=0; jj<bs; jj++) { 2076 *bap++ = *value++; 2077 } 2078 } 2079 } 2080 noinsert2:; 2081 low = i; 2082 } 2083 ailen[row] = nrow; 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2090 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2091 { 2092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2093 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2094 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2095 PetscErrorCode ierr; 2096 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2097 MatScalar *aa = a->a,*ap; 2098 PetscReal ratio=0.6; 2099 2100 PetscFunctionBegin; 2101 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2102 2103 if (m) rmax = ailen[0]; 2104 for (i=1; i<mbs; i++) { 2105 /* move each row back by the amount of empty slots (fshift) before it*/ 2106 fshift += imax[i-1] - ailen[i-1]; 2107 rmax = PetscMax(rmax,ailen[i]); 2108 if (fshift) { 2109 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2110 N = ailen[i]; 2111 for (j=0; j<N; j++) { 2112 ip[j-fshift] = ip[j]; 2113 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2114 } 2115 } 2116 ai[i] = ai[i-1] + ailen[i-1]; 2117 } 2118 if (mbs) { 2119 fshift += imax[mbs-1] - ailen[mbs-1]; 2120 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2121 } 2122 /* reset ilen and imax for each row */ 2123 for (i=0; i<mbs; i++) { 2124 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2125 } 2126 a->nz = ai[mbs]; 2127 2128 /* diagonals may have moved, so kill the diagonal pointers */ 2129 a->idiagvalid = PETSC_FALSE; 2130 if (fshift && a->diag) { 2131 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2132 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2133 a->diag = 0; 2134 } 2135 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2136 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2137 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2138 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2139 A->info.mallocs += a->reallocs; 2140 a->reallocs = 0; 2141 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2142 2143 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2144 A->same_nonzero = PETSC_TRUE; 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /* 2149 This function returns an array of flags which indicate the locations of contiguous 2150 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2151 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2152 Assume: sizes should be long enough to hold all the values. 2153 */ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2156 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2157 { 2158 PetscInt i,j,k,row; 2159 PetscBool flg; 2160 2161 PetscFunctionBegin; 2162 for (i=0,j=0; i<n; j++) { 2163 row = idx[i]; 2164 if (row%bs!=0) { /* Not the begining of a block */ 2165 sizes[j] = 1; 2166 i++; 2167 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2168 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2169 i++; 2170 } else { /* Begining of the block, so check if the complete block exists */ 2171 flg = PETSC_TRUE; 2172 for (k=1; k<bs; k++) { 2173 if (row+k != idx[i+k]) { /* break in the block */ 2174 flg = PETSC_FALSE; 2175 break; 2176 } 2177 } 2178 if (flg) { /* No break in the bs */ 2179 sizes[j] = bs; 2180 i+= bs; 2181 } else { 2182 sizes[j] = 1; 2183 i++; 2184 } 2185 } 2186 } 2187 *bs_max = j; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2193 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2194 { 2195 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2196 PetscErrorCode ierr; 2197 PetscInt i,j,k,count,*rows; 2198 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2199 PetscScalar zero = 0.0; 2200 MatScalar *aa; 2201 const PetscScalar *xx; 2202 PetscScalar *bb; 2203 2204 PetscFunctionBegin; 2205 /* fix right hand side if needed */ 2206 if (x && b) { 2207 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2208 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2209 for (i=0; i<is_n; i++) { 2210 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2211 } 2212 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2213 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2214 } 2215 2216 /* Make a copy of the IS and sort it */ 2217 /* allocate memory for rows,sizes */ 2218 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2219 2220 /* copy IS values to rows, and sort them */ 2221 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2222 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2223 2224 if (baij->keepnonzeropattern) { 2225 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2226 bs_max = is_n; 2227 A->same_nonzero = PETSC_TRUE; 2228 } else { 2229 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2230 A->same_nonzero = PETSC_FALSE; 2231 } 2232 2233 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2234 row = rows[j]; 2235 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2236 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2237 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2238 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2239 if (diag != (PetscScalar)0.0) { 2240 if (baij->ilen[row/bs] > 0) { 2241 baij->ilen[row/bs] = 1; 2242 baij->j[baij->i[row/bs]] = row/bs; 2243 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2244 } 2245 /* Now insert all the diagonal values for this bs */ 2246 for (k=0; k<bs; k++) { 2247 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2248 } 2249 } else { /* (diag == 0.0) */ 2250 baij->ilen[row/bs] = 0; 2251 } /* end (diag == 0.0) */ 2252 } else { /* (sizes[i] != bs) */ 2253 #if defined (PETSC_USE_DEBUG) 2254 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2255 #endif 2256 for (k=0; k<count; k++) { 2257 aa[0] = zero; 2258 aa += bs; 2259 } 2260 if (diag != (PetscScalar)0.0) { 2261 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2262 } 2263 } 2264 } 2265 2266 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2267 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2273 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2274 { 2275 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2276 PetscErrorCode ierr; 2277 PetscInt i,j,k,count; 2278 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2279 PetscScalar zero = 0.0; 2280 MatScalar *aa; 2281 const PetscScalar *xx; 2282 PetscScalar *bb; 2283 PetscBool *zeroed,vecs = PETSC_FALSE; 2284 2285 PetscFunctionBegin; 2286 /* fix right hand side if needed */ 2287 if (x && b) { 2288 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2289 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2290 vecs = PETSC_TRUE; 2291 } 2292 A->same_nonzero = PETSC_TRUE; 2293 2294 /* zero the columns */ 2295 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2296 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2297 for (i=0; i<is_n; i++) { 2298 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2299 zeroed[is_idx[i]] = PETSC_TRUE; 2300 } 2301 for (i=0; i<A->rmap->N; i++) { 2302 if (!zeroed[i]) { 2303 row = i/bs; 2304 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2305 for (k=0; k<bs; k++) { 2306 col = bs*baij->j[j] + k; 2307 if (zeroed[col]) { 2308 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2309 if (vecs) bb[i] -= aa[0]*xx[col]; 2310 aa[0] = 0.0; 2311 } 2312 } 2313 } 2314 } else if (vecs) bb[i] = diag*xx[i]; 2315 } 2316 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2317 if (vecs) { 2318 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2319 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2320 } 2321 2322 /* zero the rows */ 2323 for (i=0; i<is_n; i++) { 2324 row = is_idx[i]; 2325 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2326 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2327 for (k=0; k<count; k++) { 2328 aa[0] = zero; 2329 aa += bs; 2330 } 2331 if (diag != (PetscScalar)0.0) { 2332 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2333 } 2334 } 2335 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2341 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2342 { 2343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2344 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2345 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2346 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2347 PetscErrorCode ierr; 2348 PetscInt ridx,cidx,bs2=a->bs2; 2349 PetscBool roworiented=a->roworiented; 2350 MatScalar *ap,value,*aa=a->a,*bap; 2351 2352 PetscFunctionBegin; 2353 if (v) PetscValidScalarPointer(v,6); 2354 for (k=0; k<m; k++) { /* loop over added rows */ 2355 row = im[k]; 2356 brow = row/bs; 2357 if (row < 0) continue; 2358 #if defined(PETSC_USE_DEBUG) 2359 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2360 #endif 2361 rp = aj + ai[brow]; 2362 ap = aa + bs2*ai[brow]; 2363 rmax = imax[brow]; 2364 nrow = ailen[brow]; 2365 low = 0; 2366 high = nrow; 2367 for (l=0; l<n; l++) { /* loop over added columns */ 2368 if (in[l] < 0) continue; 2369 #if defined(PETSC_USE_DEBUG) 2370 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2371 #endif 2372 col = in[l]; bcol = col/bs; 2373 ridx = row % bs; cidx = col % bs; 2374 if (roworiented) { 2375 value = v[l + k*n]; 2376 } else { 2377 value = v[k + l*m]; 2378 } 2379 if (col <= lastcol) low = 0; else high = nrow; 2380 lastcol = col; 2381 while (high-low > 7) { 2382 t = (low+high)/2; 2383 if (rp[t] > bcol) high = t; 2384 else low = t; 2385 } 2386 for (i=low; i<high; i++) { 2387 if (rp[i] > bcol) break; 2388 if (rp[i] == bcol) { 2389 bap = ap + bs2*i + bs*cidx + ridx; 2390 if (is == ADD_VALUES) *bap += value; 2391 else *bap = value; 2392 goto noinsert1; 2393 } 2394 } 2395 if (nonew == 1) goto noinsert1; 2396 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2397 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2398 N = nrow++ - 1; high++; 2399 /* shift up all the later entries in this row */ 2400 for (ii=N; ii>=i; ii--) { 2401 rp[ii+1] = rp[ii]; 2402 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2403 } 2404 if (N>=i) { 2405 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2406 } 2407 rp[i] = bcol; 2408 ap[bs2*i + bs*cidx + ridx] = value; 2409 a->nz++; 2410 noinsert1:; 2411 low = i; 2412 } 2413 ailen[brow] = nrow; 2414 } 2415 A->same_nonzero = PETSC_FALSE; 2416 PetscFunctionReturn(0); 2417 } 2418 2419 #undef __FUNCT__ 2420 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2421 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2422 { 2423 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2424 Mat outA; 2425 PetscErrorCode ierr; 2426 PetscBool row_identity,col_identity; 2427 2428 PetscFunctionBegin; 2429 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2430 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2431 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2432 if (!row_identity || !col_identity) { 2433 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2434 } 2435 2436 outA = inA; 2437 inA->factortype = MAT_FACTOR_LU; 2438 2439 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2440 2441 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2442 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2443 a->row = row; 2444 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2445 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2446 a->col = col; 2447 2448 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2449 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2450 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2451 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2452 2453 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2454 if (!a->solve_work) { 2455 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2456 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2457 } 2458 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2459 2460 PetscFunctionReturn(0); 2461 } 2462 2463 EXTERN_C_BEGIN 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2466 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2467 { 2468 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2469 PetscInt i,nz,mbs; 2470 2471 PetscFunctionBegin; 2472 nz = baij->maxnz; 2473 mbs = baij->mbs; 2474 for (i=0; i<nz; i++) { 2475 baij->j[i] = indices[i]; 2476 } 2477 baij->nz = nz; 2478 for (i=0; i<mbs; i++) { 2479 baij->ilen[i] = baij->imax[i]; 2480 } 2481 PetscFunctionReturn(0); 2482 } 2483 EXTERN_C_END 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2487 /*@ 2488 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2489 in the matrix. 2490 2491 Input Parameters: 2492 + mat - the SeqBAIJ matrix 2493 - indices - the column indices 2494 2495 Level: advanced 2496 2497 Notes: 2498 This can be called if you have precomputed the nonzero structure of the 2499 matrix and want to provide it to the matrix object to improve the performance 2500 of the MatSetValues() operation. 2501 2502 You MUST have set the correct numbers of nonzeros per row in the call to 2503 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2504 2505 MUST be called before any calls to MatSetValues(); 2506 2507 @*/ 2508 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2509 { 2510 PetscErrorCode ierr; 2511 2512 PetscFunctionBegin; 2513 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2514 PetscValidPointer(indices,2); 2515 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2521 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2522 { 2523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2524 PetscErrorCode ierr; 2525 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2526 PetscReal atmp; 2527 PetscScalar *x,zero = 0.0; 2528 MatScalar *aa; 2529 PetscInt ncols,brow,krow,kcol; 2530 2531 PetscFunctionBegin; 2532 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2533 bs = A->rmap->bs; 2534 aa = a->a; 2535 ai = a->i; 2536 aj = a->j; 2537 mbs = a->mbs; 2538 2539 ierr = VecSet(v,zero);CHKERRQ(ierr); 2540 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2541 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2542 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2543 for (i=0; i<mbs; i++) { 2544 ncols = ai[1] - ai[0]; ai++; 2545 brow = bs*i; 2546 for (j=0; j<ncols; j++){ 2547 for (kcol=0; kcol<bs; kcol++){ 2548 for (krow=0; krow<bs; krow++){ 2549 atmp = PetscAbsScalar(*aa);aa++; 2550 row = brow + krow; /* row index */ 2551 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2552 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2553 } 2554 } 2555 aj++; 2556 } 2557 } 2558 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatCopy_SeqBAIJ" 2564 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2565 { 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 /* If the two matrices have the same copy implementation, use fast copy. */ 2570 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2571 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2572 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2573 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2574 2575 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2576 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2577 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2578 } else { 2579 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2580 } 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2586 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2587 { 2588 PetscErrorCode ierr; 2589 2590 PetscFunctionBegin; 2591 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2597 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2598 { 2599 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2600 PetscFunctionBegin; 2601 *array = a->a; 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2607 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2608 { 2609 PetscFunctionBegin; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2615 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2616 { 2617 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2618 PetscErrorCode ierr; 2619 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2620 PetscBLASInt one=1; 2621 2622 PetscFunctionBegin; 2623 if (str == SAME_NONZERO_PATTERN) { 2624 PetscScalar alpha = a; 2625 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2626 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2627 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2628 if (y->xtoy && y->XtoY != X) { 2629 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2630 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2631 } 2632 if (!y->xtoy) { /* get xtoy */ 2633 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2634 y->XtoY = X; 2635 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2636 } 2637 for (i=0; i<x->nz; i++) { 2638 j = 0; 2639 while (j < bs2){ 2640 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2641 j++; 2642 } 2643 } 2644 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2645 } else { 2646 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2647 } 2648 PetscFunctionReturn(0); 2649 } 2650 2651 #undef __FUNCT__ 2652 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2653 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2654 { 2655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2656 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2657 MatScalar *aa = a->a; 2658 2659 PetscFunctionBegin; 2660 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #undef __FUNCT__ 2665 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2666 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2667 { 2668 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2669 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2670 MatScalar *aa = a->a; 2671 2672 PetscFunctionBegin; 2673 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2674 PetscFunctionReturn(0); 2675 } 2676 2677 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2678 2679 #undef __FUNCT__ 2680 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2681 /* 2682 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2683 */ 2684 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2685 { 2686 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2687 PetscErrorCode ierr; 2688 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2689 PetscInt nz = a->i[m],row,*jj,mr,col; 2690 2691 PetscFunctionBegin; 2692 *nn = n; 2693 if (!ia) PetscFunctionReturn(0); 2694 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2695 else { 2696 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2697 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2698 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2699 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2700 jj = a->j; 2701 for (i=0; i<nz; i++) { 2702 collengths[jj[i]]++; 2703 } 2704 cia[0] = oshift; 2705 for (i=0; i<n; i++) { 2706 cia[i+1] = cia[i] + collengths[i]; 2707 } 2708 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2709 jj = a->j; 2710 for (row=0; row<m; row++) { 2711 mr = a->i[row+1] - a->i[row]; 2712 for (i=0; i<mr; i++) { 2713 col = *jj++; 2714 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2715 } 2716 } 2717 ierr = PetscFree(collengths);CHKERRQ(ierr); 2718 *ia = cia; *ja = cja; 2719 } 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2725 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2726 { 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 if (!ia) PetscFunctionReturn(0); 2731 ierr = PetscFree(*ia);CHKERRQ(ierr); 2732 ierr = PetscFree(*ja);CHKERRQ(ierr); 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2738 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2739 { 2740 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2741 PetscErrorCode ierr; 2742 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2743 PetscScalar dx,*y,*xx,*w3_array; 2744 PetscScalar *vscale_array; 2745 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2746 Vec w1=coloring->w1,w2=coloring->w2,w3; 2747 void *fctx = coloring->fctx; 2748 PetscBool flg = PETSC_FALSE; 2749 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2750 Vec x1_tmp; 2751 2752 PetscFunctionBegin; 2753 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2754 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2755 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2756 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2757 2758 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2759 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2760 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2761 if (flg) { 2762 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2763 } else { 2764 PetscBool assembled; 2765 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2766 if (assembled) { 2767 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2768 } 2769 } 2770 2771 x1_tmp = x1; 2772 if (!coloring->vscale){ 2773 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2774 } 2775 2776 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2777 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2778 } 2779 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2780 2781 /* Set w1 = F(x1) */ 2782 if (!coloring->fset) { 2783 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2784 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2785 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2786 } else { 2787 coloring->fset = PETSC_FALSE; 2788 } 2789 2790 if (!coloring->w3) { 2791 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2793 } 2794 w3 = coloring->w3; 2795 2796 CHKMEMQ; 2797 /* Compute all the local scale factors, including ghost points */ 2798 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2799 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2800 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2801 if (ctype == IS_COLORING_GHOSTED){ 2802 col_start = 0; col_end = N; 2803 } else if (ctype == IS_COLORING_GLOBAL){ 2804 xx = xx - start; 2805 vscale_array = vscale_array - start; 2806 col_start = start; col_end = N + start; 2807 } CHKMEMQ; 2808 for (col=col_start; col<col_end; col++){ 2809 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2810 if (coloring->htype[0] == 'w') { 2811 dx = 1.0 + unorm; 2812 } else { 2813 dx = xx[col]; 2814 } 2815 if (dx == (PetscScalar)0.0) dx = 1.0; 2816 #if !defined(PETSC_USE_COMPLEX) 2817 if (dx < umin && dx >= 0.0) dx = umin; 2818 else if (dx < 0.0 && dx > -umin) dx = -umin; 2819 #else 2820 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2821 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2822 #endif 2823 dx *= epsilon; 2824 vscale_array[col] = (PetscScalar)1.0/dx; 2825 } CHKMEMQ; 2826 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2827 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2828 if (ctype == IS_COLORING_GLOBAL){ 2829 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2830 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2831 } 2832 CHKMEMQ; 2833 if (coloring->vscaleforrow) { 2834 vscaleforrow = coloring->vscaleforrow; 2835 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2836 2837 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2838 /* 2839 Loop over each color 2840 */ 2841 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2842 for (k=0; k<coloring->ncolors; k++) { 2843 coloring->currentcolor = k; 2844 for (i=0; i<bs; i++) { 2845 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2846 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2847 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2848 /* 2849 Loop over each column associated with color 2850 adding the perturbation to the vector w3. 2851 */ 2852 for (l=0; l<coloring->ncolumns[k]; l++) { 2853 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2854 if (coloring->htype[0] == 'w') { 2855 dx = 1.0 + unorm; 2856 } else { 2857 dx = xx[col]; 2858 } 2859 if (dx == (PetscScalar)0.0) dx = 1.0; 2860 #if !defined(PETSC_USE_COMPLEX) 2861 if (dx < umin && dx >= 0.0) dx = umin; 2862 else if (dx < 0.0 && dx > -umin) dx = -umin; 2863 #else 2864 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2865 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2866 #endif 2867 dx *= epsilon; 2868 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2869 w3_array[col] += dx; 2870 } 2871 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2872 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2873 2874 /* 2875 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2876 w2 = F(x1 + dx) - F(x1) 2877 */ 2878 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2879 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2880 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2881 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2882 2883 /* 2884 Loop over rows of vector, putting results into Jacobian matrix 2885 */ 2886 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2887 for (l=0; l<coloring->nrows[k]; l++) { 2888 row = bs*coloring->rows[k][l]; /* local row index */ 2889 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2890 for (j=0; j<bs; j++) { 2891 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2892 srows[j] = row + start + j; 2893 } 2894 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2895 } 2896 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2897 } 2898 } /* endof for each color */ 2899 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2900 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2901 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2902 ierr = PetscFree(srows);CHKERRQ(ierr); 2903 2904 coloring->currentcolor = -1; 2905 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2907 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2908 PetscFunctionReturn(0); 2909 } 2910 2911 /* -------------------------------------------------------------------*/ 2912 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2913 MatGetRow_SeqBAIJ, 2914 MatRestoreRow_SeqBAIJ, 2915 MatMult_SeqBAIJ_N, 2916 /* 4*/ MatMultAdd_SeqBAIJ_N, 2917 MatMultTranspose_SeqBAIJ, 2918 MatMultTransposeAdd_SeqBAIJ, 2919 0, 2920 0, 2921 0, 2922 /*10*/ 0, 2923 MatLUFactor_SeqBAIJ, 2924 0, 2925 0, 2926 MatTranspose_SeqBAIJ, 2927 /*15*/ MatGetInfo_SeqBAIJ, 2928 MatEqual_SeqBAIJ, 2929 MatGetDiagonal_SeqBAIJ, 2930 MatDiagonalScale_SeqBAIJ, 2931 MatNorm_SeqBAIJ, 2932 /*20*/ 0, 2933 MatAssemblyEnd_SeqBAIJ, 2934 MatSetOption_SeqBAIJ, 2935 MatZeroEntries_SeqBAIJ, 2936 /*24*/ MatZeroRows_SeqBAIJ, 2937 0, 2938 0, 2939 0, 2940 0, 2941 /*29*/ MatSetUp_SeqBAIJ, 2942 0, 2943 0, 2944 0, 2945 0, 2946 /*34*/ MatDuplicate_SeqBAIJ, 2947 0, 2948 0, 2949 MatILUFactor_SeqBAIJ, 2950 0, 2951 /*39*/ MatAXPY_SeqBAIJ, 2952 MatGetSubMatrices_SeqBAIJ, 2953 MatIncreaseOverlap_SeqBAIJ, 2954 MatGetValues_SeqBAIJ, 2955 MatCopy_SeqBAIJ, 2956 /*44*/ 0, 2957 MatScale_SeqBAIJ, 2958 0, 2959 0, 2960 MatZeroRowsColumns_SeqBAIJ, 2961 /*49*/ 0, 2962 MatGetRowIJ_SeqBAIJ, 2963 MatRestoreRowIJ_SeqBAIJ, 2964 MatGetColumnIJ_SeqBAIJ, 2965 MatRestoreColumnIJ_SeqBAIJ, 2966 /*54*/ MatFDColoringCreate_SeqAIJ, 2967 0, 2968 0, 2969 0, 2970 MatSetValuesBlocked_SeqBAIJ, 2971 /*59*/ MatGetSubMatrix_SeqBAIJ, 2972 MatDestroy_SeqBAIJ, 2973 MatView_SeqBAIJ, 2974 0, 2975 0, 2976 /*64*/ 0, 2977 0, 2978 0, 2979 0, 2980 0, 2981 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2982 0, 2983 MatConvert_Basic, 2984 0, 2985 0, 2986 /*74*/ 0, 2987 MatFDColoringApply_BAIJ, 2988 0, 2989 0, 2990 0, 2991 /*79*/ 0, 2992 0, 2993 0, 2994 0, 2995 MatLoad_SeqBAIJ, 2996 /*84*/ 0, 2997 0, 2998 0, 2999 0, 3000 0, 3001 /*89*/ 0, 3002 0, 3003 0, 3004 0, 3005 0, 3006 /*94*/ 0, 3007 0, 3008 0, 3009 0, 3010 0, 3011 /*99*/0, 3012 0, 3013 0, 3014 0, 3015 0, 3016 /*104*/0, 3017 MatRealPart_SeqBAIJ, 3018 MatImaginaryPart_SeqBAIJ, 3019 0, 3020 0, 3021 /*109*/0, 3022 0, 3023 0, 3024 0, 3025 MatMissingDiagonal_SeqBAIJ, 3026 /*114*/0, 3027 0, 3028 0, 3029 0, 3030 0, 3031 /*119*/0, 3032 0, 3033 MatMultHermitianTranspose_SeqBAIJ, 3034 MatMultHermitianTransposeAdd_SeqBAIJ, 3035 0, 3036 /*124*/0, 3037 0, 3038 MatInvertBlockDiagonal_SeqBAIJ 3039 }; 3040 3041 EXTERN_C_BEGIN 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3044 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3045 { 3046 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3047 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3048 PetscErrorCode ierr; 3049 3050 PetscFunctionBegin; 3051 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3052 3053 /* allocate space for values if not already there */ 3054 if (!aij->saved_values) { 3055 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3056 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3057 } 3058 3059 /* copy values over */ 3060 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3061 PetscFunctionReturn(0); 3062 } 3063 EXTERN_C_END 3064 3065 EXTERN_C_BEGIN 3066 #undef __FUNCT__ 3067 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3068 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3069 { 3070 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3071 PetscErrorCode ierr; 3072 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3073 3074 PetscFunctionBegin; 3075 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3076 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3077 3078 /* copy values over */ 3079 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 EXTERN_C_END 3083 3084 EXTERN_C_BEGIN 3085 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3086 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3087 EXTERN_C_END 3088 3089 EXTERN_C_BEGIN 3090 #undef __FUNCT__ 3091 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3092 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3093 { 3094 Mat_SeqBAIJ *b; 3095 PetscErrorCode ierr; 3096 PetscInt i,mbs,nbs,bs2; 3097 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3098 3099 PetscFunctionBegin; 3100 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3101 if (nz == MAT_SKIP_ALLOCATION) { 3102 skipallocation = PETSC_TRUE; 3103 nz = 0; 3104 } 3105 3106 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3107 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3108 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3109 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3110 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3111 3112 B->preallocated = PETSC_TRUE; 3113 3114 mbs = B->rmap->n/bs; 3115 nbs = B->cmap->n/bs; 3116 bs2 = bs*bs; 3117 3118 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3119 3120 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3121 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3122 if (nnz) { 3123 for (i=0; i<mbs; i++) { 3124 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3125 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3126 } 3127 } 3128 3129 b = (Mat_SeqBAIJ*)B->data; 3130 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3131 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3132 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3133 3134 if (!flg) { 3135 switch (bs) { 3136 case 1: 3137 B->ops->mult = MatMult_SeqBAIJ_1; 3138 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3139 B->ops->sor = MatSOR_SeqBAIJ_1; 3140 break; 3141 case 2: 3142 B->ops->mult = MatMult_SeqBAIJ_2; 3143 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3144 B->ops->sor = MatSOR_SeqBAIJ_2; 3145 break; 3146 case 3: 3147 B->ops->mult = MatMult_SeqBAIJ_3; 3148 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3149 B->ops->sor = MatSOR_SeqBAIJ_3; 3150 break; 3151 case 4: 3152 B->ops->mult = MatMult_SeqBAIJ_4; 3153 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3154 B->ops->sor = MatSOR_SeqBAIJ_4; 3155 break; 3156 case 5: 3157 B->ops->mult = MatMult_SeqBAIJ_5; 3158 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3159 B->ops->sor = MatSOR_SeqBAIJ_5; 3160 break; 3161 case 6: 3162 B->ops->mult = MatMult_SeqBAIJ_6; 3163 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3164 B->ops->sor = MatSOR_SeqBAIJ_6; 3165 break; 3166 case 7: 3167 B->ops->mult = MatMult_SeqBAIJ_7; 3168 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3169 B->ops->sor = MatSOR_SeqBAIJ_7; 3170 break; 3171 case 15: 3172 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3173 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3174 B->ops->sor = MatSOR_SeqBAIJ_N; 3175 break; 3176 default: 3177 B->ops->mult = MatMult_SeqBAIJ_N; 3178 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3179 B->ops->sor = MatSOR_SeqBAIJ_N; 3180 break; 3181 } 3182 } 3183 b->mbs = mbs; 3184 b->nbs = nbs; 3185 if (!skipallocation) { 3186 if (!b->imax) { 3187 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3188 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3189 b->free_imax_ilen = PETSC_TRUE; 3190 } 3191 /* b->ilen will count nonzeros in each block row so far. */ 3192 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3193 if (!nnz) { 3194 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3195 else if (nz < 0) nz = 1; 3196 for (i=0; i<mbs; i++) b->imax[i] = nz; 3197 nz = nz*mbs; 3198 } else { 3199 nz = 0; 3200 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3201 } 3202 3203 /* allocate the matrix space */ 3204 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3205 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3206 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3207 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3208 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3209 b->singlemalloc = PETSC_TRUE; 3210 b->i[0] = 0; 3211 for (i=1; i<mbs+1; i++) { 3212 b->i[i] = b->i[i-1] + b->imax[i-1]; 3213 } 3214 b->free_a = PETSC_TRUE; 3215 b->free_ij = PETSC_TRUE; 3216 } else { 3217 b->free_a = PETSC_FALSE; 3218 b->free_ij = PETSC_FALSE; 3219 } 3220 3221 b->bs2 = bs2; 3222 b->mbs = mbs; 3223 b->nz = 0; 3224 b->maxnz = nz; 3225 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3226 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3227 PetscFunctionReturn(0); 3228 } 3229 EXTERN_C_END 3230 3231 EXTERN_C_BEGIN 3232 #undef __FUNCT__ 3233 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3234 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3235 { 3236 PetscInt i,m,nz,nz_max=0,*nnz; 3237 PetscScalar *values=0; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3242 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3243 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3244 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3245 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3246 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3247 m = B->rmap->n/bs; 3248 3249 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3250 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3251 for (i=0; i<m; i++) { 3252 nz = ii[i+1]- ii[i]; 3253 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3254 nz_max = PetscMax(nz_max, nz); 3255 nnz[i] = nz; 3256 } 3257 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3258 ierr = PetscFree(nnz);CHKERRQ(ierr); 3259 3260 values = (PetscScalar*)V; 3261 if (!values) { 3262 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3263 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3264 } 3265 for (i=0; i<m; i++) { 3266 PetscInt ncols = ii[i+1] - ii[i]; 3267 const PetscInt *icols = jj + ii[i]; 3268 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3269 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3270 } 3271 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3272 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3273 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3274 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3275 PetscFunctionReturn(0); 3276 } 3277 EXTERN_C_END 3278 3279 3280 EXTERN_C_BEGIN 3281 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3282 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3283 #if defined(PETSC_HAVE_MUMPS) 3284 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3285 #endif 3286 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3287 EXTERN_C_END 3288 3289 /*MC 3290 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3291 block sparse compressed row format. 3292 3293 Options Database Keys: 3294 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3295 3296 Level: beginner 3297 3298 .seealso: MatCreateSeqBAIJ() 3299 M*/ 3300 3301 EXTERN_C_BEGIN 3302 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3303 EXTERN_C_END 3304 3305 EXTERN_C_BEGIN 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatCreate_SeqBAIJ" 3308 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3309 { 3310 PetscErrorCode ierr; 3311 PetscMPIInt size; 3312 Mat_SeqBAIJ *b; 3313 3314 PetscFunctionBegin; 3315 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3316 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3317 3318 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3319 B->data = (void*)b; 3320 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3321 b->row = 0; 3322 b->col = 0; 3323 b->icol = 0; 3324 b->reallocs = 0; 3325 b->saved_values = 0; 3326 3327 b->roworiented = PETSC_TRUE; 3328 b->nonew = 0; 3329 b->diag = 0; 3330 b->solve_work = 0; 3331 b->mult_work = 0; 3332 B->spptr = 0; 3333 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3334 b->keepnonzeropattern = PETSC_FALSE; 3335 b->xtoy = 0; 3336 b->XtoY = 0; 3337 B->same_nonzero = PETSC_FALSE; 3338 3339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3340 "MatGetFactorAvailable_seqbaij_petsc", 3341 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3343 "MatGetFactor_seqbaij_petsc", 3344 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3346 "MatGetFactor_seqbaij_bstrm", 3347 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3348 #if defined(PETSC_HAVE_MUMPS) 3349 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3350 #endif 3351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3352 "MatInvertBlockDiagonal_SeqBAIJ", 3353 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3355 "MatStoreValues_SeqBAIJ", 3356 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3358 "MatRetrieveValues_SeqBAIJ", 3359 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3361 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3362 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3364 "MatConvert_SeqBAIJ_SeqAIJ", 3365 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3367 "MatConvert_SeqBAIJ_SeqSBAIJ", 3368 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3370 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3371 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3373 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3374 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3376 "MatConvert_SeqBAIJ_SeqBSTRM", 3377 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3379 "MatIsTranspose_SeqBAIJ", 3380 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3381 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3382 PetscFunctionReturn(0); 3383 } 3384 EXTERN_C_END 3385 3386 #undef __FUNCT__ 3387 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3388 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3389 { 3390 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3391 PetscErrorCode ierr; 3392 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3393 3394 PetscFunctionBegin; 3395 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3396 3397 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3398 c->imax = a->imax; 3399 c->ilen = a->ilen; 3400 c->free_imax_ilen = PETSC_FALSE; 3401 } else { 3402 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3403 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3404 for (i=0; i<mbs; i++) { 3405 c->imax[i] = a->imax[i]; 3406 c->ilen[i] = a->ilen[i]; 3407 } 3408 c->free_imax_ilen = PETSC_TRUE; 3409 } 3410 3411 /* allocate the matrix space */ 3412 if (mallocmatspace){ 3413 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3414 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3415 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3416 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3417 c->i = a->i; 3418 c->j = a->j; 3419 c->singlemalloc = PETSC_FALSE; 3420 c->free_a = PETSC_TRUE; 3421 c->free_ij = PETSC_FALSE; 3422 c->parent = A; 3423 C->preallocated = PETSC_TRUE; 3424 C->assembled = PETSC_TRUE; 3425 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3426 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3427 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3428 } else { 3429 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3430 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3431 c->singlemalloc = PETSC_TRUE; 3432 c->free_a = PETSC_TRUE; 3433 c->free_ij = PETSC_TRUE; 3434 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3435 if (mbs > 0) { 3436 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3437 if (cpvalues == MAT_COPY_VALUES) { 3438 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3439 } else { 3440 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3441 } 3442 } 3443 C->preallocated = PETSC_TRUE; 3444 C->assembled = PETSC_TRUE; 3445 } 3446 } 3447 3448 c->roworiented = a->roworiented; 3449 c->nonew = a->nonew; 3450 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3451 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3452 c->bs2 = a->bs2; 3453 c->mbs = a->mbs; 3454 c->nbs = a->nbs; 3455 3456 if (a->diag) { 3457 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3458 c->diag = a->diag; 3459 c->free_diag = PETSC_FALSE; 3460 } else { 3461 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3462 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3463 for (i=0; i<mbs; i++) { 3464 c->diag[i] = a->diag[i]; 3465 } 3466 c->free_diag = PETSC_TRUE; 3467 } 3468 } else c->diag = 0; 3469 c->nz = a->nz; 3470 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3471 c->solve_work = 0; 3472 c->mult_work = 0; 3473 3474 c->compressedrow.use = a->compressedrow.use; 3475 c->compressedrow.nrows = a->compressedrow.nrows; 3476 c->compressedrow.check = a->compressedrow.check; 3477 if (a->compressedrow.use){ 3478 i = a->compressedrow.nrows; 3479 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3480 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3481 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3482 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3483 } else { 3484 c->compressedrow.use = PETSC_FALSE; 3485 c->compressedrow.i = PETSC_NULL; 3486 c->compressedrow.rindex = PETSC_NULL; 3487 } 3488 C->same_nonzero = A->same_nonzero; 3489 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3490 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3496 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3497 { 3498 PetscErrorCode ierr; 3499 3500 PetscFunctionBegin; 3501 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3502 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3503 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3504 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatLoad_SeqBAIJ" 3510 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3511 { 3512 Mat_SeqBAIJ *a; 3513 PetscErrorCode ierr; 3514 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3515 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3516 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3517 PetscInt *masked,nmask,tmp,bs2,ishift; 3518 PetscMPIInt size; 3519 int fd; 3520 PetscScalar *aa; 3521 MPI_Comm comm = ((PetscObject)viewer)->comm; 3522 3523 PetscFunctionBegin; 3524 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3525 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3526 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3527 bs2 = bs*bs; 3528 3529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3530 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3531 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3532 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3533 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3534 M = header[1]; N = header[2]; nz = header[3]; 3535 3536 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3537 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3538 3539 /* 3540 This code adds extra rows to make sure the number of rows is 3541 divisible by the blocksize 3542 */ 3543 mbs = M/bs; 3544 extra_rows = bs - M + bs*(mbs); 3545 if (extra_rows == bs) extra_rows = 0; 3546 else mbs++; 3547 if (extra_rows) { 3548 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3549 } 3550 3551 /* Set global sizes if not already set */ 3552 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3553 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3554 } else { /* Check if the matrix global sizes are correct */ 3555 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3556 if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */ 3557 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3558 } 3559 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3560 } 3561 3562 /* read in row lengths */ 3563 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3564 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3565 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3566 3567 /* read in column indices */ 3568 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3569 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3570 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3571 3572 /* loop over row lengths determining block row lengths */ 3573 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3574 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3575 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3576 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3577 rowcount = 0; 3578 nzcount = 0; 3579 for (i=0; i<mbs; i++) { 3580 nmask = 0; 3581 for (j=0; j<bs; j++) { 3582 kmax = rowlengths[rowcount]; 3583 for (k=0; k<kmax; k++) { 3584 tmp = jj[nzcount++]/bs; 3585 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3586 } 3587 rowcount++; 3588 } 3589 browlengths[i] += nmask; 3590 /* zero out the mask elements we set */ 3591 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3592 } 3593 3594 /* Do preallocation */ 3595 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3596 a = (Mat_SeqBAIJ*)newmat->data; 3597 3598 /* set matrix "i" values */ 3599 a->i[0] = 0; 3600 for (i=1; i<= mbs; i++) { 3601 a->i[i] = a->i[i-1] + browlengths[i-1]; 3602 a->ilen[i-1] = browlengths[i-1]; 3603 } 3604 a->nz = 0; 3605 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3606 3607 /* read in nonzero values */ 3608 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3609 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3610 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3611 3612 /* set "a" and "j" values into matrix */ 3613 nzcount = 0; jcount = 0; 3614 for (i=0; i<mbs; i++) { 3615 nzcountb = nzcount; 3616 nmask = 0; 3617 for (j=0; j<bs; j++) { 3618 kmax = rowlengths[i*bs+j]; 3619 for (k=0; k<kmax; k++) { 3620 tmp = jj[nzcount++]/bs; 3621 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3622 } 3623 } 3624 /* sort the masked values */ 3625 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3626 3627 /* set "j" values into matrix */ 3628 maskcount = 1; 3629 for (j=0; j<nmask; j++) { 3630 a->j[jcount++] = masked[j]; 3631 mask[masked[j]] = maskcount++; 3632 } 3633 /* set "a" values into matrix */ 3634 ishift = bs2*a->i[i]; 3635 for (j=0; j<bs; j++) { 3636 kmax = rowlengths[i*bs+j]; 3637 for (k=0; k<kmax; k++) { 3638 tmp = jj[nzcountb]/bs ; 3639 block = mask[tmp] - 1; 3640 point = jj[nzcountb] - bs*tmp; 3641 idx = ishift + bs2*block + j + bs*point; 3642 a->a[idx] = (MatScalar)aa[nzcountb++]; 3643 } 3644 } 3645 /* zero out the mask elements we set */ 3646 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3647 } 3648 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3649 3650 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3651 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3652 ierr = PetscFree(aa);CHKERRQ(ierr); 3653 ierr = PetscFree(jj);CHKERRQ(ierr); 3654 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3655 3656 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3658 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3659 PetscFunctionReturn(0); 3660 } 3661 3662 #undef __FUNCT__ 3663 #define __FUNCT__ "MatCreateSeqBAIJ" 3664 /*@C 3665 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3666 compressed row) format. For good matrix assembly performance the 3667 user should preallocate the matrix storage by setting the parameter nz 3668 (or the array nnz). By setting these parameters accurately, performance 3669 during matrix assembly can be increased by more than a factor of 50. 3670 3671 Collective on MPI_Comm 3672 3673 Input Parameters: 3674 + comm - MPI communicator, set to PETSC_COMM_SELF 3675 . bs - size of block 3676 . m - number of rows 3677 . n - number of columns 3678 . nz - number of nonzero blocks per block row (same for all rows) 3679 - nnz - array containing the number of nonzero blocks in the various block rows 3680 (possibly different for each block row) or PETSC_NULL 3681 3682 Output Parameter: 3683 . A - the matrix 3684 3685 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3686 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3687 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3688 3689 Options Database Keys: 3690 . -mat_no_unroll - uses code that does not unroll the loops in the 3691 block calculations (much slower) 3692 . -mat_block_size - size of the blocks to use 3693 3694 Level: intermediate 3695 3696 Notes: 3697 The number of rows and columns must be divisible by blocksize. 3698 3699 If the nnz parameter is given then the nz parameter is ignored 3700 3701 A nonzero block is any block that as 1 or more nonzeros in it 3702 3703 The block AIJ format is fully compatible with standard Fortran 77 3704 storage. That is, the stored row and column indices can begin at 3705 either one (as in Fortran) or zero. See the users' manual for details. 3706 3707 Specify the preallocated storage with either nz or nnz (not both). 3708 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3709 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3710 matrices. 3711 3712 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3713 @*/ 3714 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3715 { 3716 PetscErrorCode ierr; 3717 3718 PetscFunctionBegin; 3719 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3720 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3721 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3722 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3723 PetscFunctionReturn(0); 3724 } 3725 3726 #undef __FUNCT__ 3727 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3728 /*@C 3729 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3730 per row in the matrix. For good matrix assembly performance the 3731 user should preallocate the matrix storage by setting the parameter nz 3732 (or the array nnz). By setting these parameters accurately, performance 3733 during matrix assembly can be increased by more than a factor of 50. 3734 3735 Collective on MPI_Comm 3736 3737 Input Parameters: 3738 + A - the matrix 3739 . bs - size of block 3740 . nz - number of block nonzeros per block row (same for all rows) 3741 - nnz - array containing the number of block nonzeros in the various block rows 3742 (possibly different for each block row) or PETSC_NULL 3743 3744 Options Database Keys: 3745 . -mat_no_unroll - uses code that does not unroll the loops in the 3746 block calculations (much slower) 3747 . -mat_block_size - size of the blocks to use 3748 3749 Level: intermediate 3750 3751 Notes: 3752 If the nnz parameter is given then the nz parameter is ignored 3753 3754 You can call MatGetInfo() to get information on how effective the preallocation was; 3755 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3756 You can also run with the option -info and look for messages with the string 3757 malloc in them to see if additional memory allocation was needed. 3758 3759 The block AIJ format is fully compatible with standard Fortran 77 3760 storage. That is, the stored row and column indices can begin at 3761 either one (as in Fortran) or zero. See the users' manual for details. 3762 3763 Specify the preallocated storage with either nz or nnz (not both). 3764 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3765 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3766 3767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3768 @*/ 3769 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3770 { 3771 PetscErrorCode ierr; 3772 3773 PetscFunctionBegin; 3774 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3775 PetscValidType(B,1); 3776 PetscValidLogicalCollectiveInt(B,bs,2); 3777 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3778 PetscFunctionReturn(0); 3779 } 3780 3781 #undef __FUNCT__ 3782 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3783 /*@C 3784 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3785 (the default sequential PETSc format). 3786 3787 Collective on MPI_Comm 3788 3789 Input Parameters: 3790 + A - the matrix 3791 . i - the indices into j for the start of each local row (starts with zero) 3792 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3793 - v - optional values in the matrix 3794 3795 Level: developer 3796 3797 .keywords: matrix, aij, compressed row, sparse 3798 3799 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3800 @*/ 3801 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3802 { 3803 PetscErrorCode ierr; 3804 3805 PetscFunctionBegin; 3806 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3807 PetscValidType(B,1); 3808 PetscValidLogicalCollectiveInt(B,bs,2); 3809 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3810 PetscFunctionReturn(0); 3811 } 3812 3813 3814 #undef __FUNCT__ 3815 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3816 /*@ 3817 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3818 3819 Collective on MPI_Comm 3820 3821 Input Parameters: 3822 + comm - must be an MPI communicator of size 1 3823 . bs - size of block 3824 . m - number of rows 3825 . n - number of columns 3826 . i - row indices 3827 . j - column indices 3828 - a - matrix values 3829 3830 Output Parameter: 3831 . mat - the matrix 3832 3833 Level: advanced 3834 3835 Notes: 3836 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3837 once the matrix is destroyed 3838 3839 You cannot set new nonzero locations into this matrix, that will generate an error. 3840 3841 The i and j indices are 0 based 3842 3843 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3844 3845 3846 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3847 3848 @*/ 3849 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3850 { 3851 PetscErrorCode ierr; 3852 PetscInt ii; 3853 Mat_SeqBAIJ *baij; 3854 3855 PetscFunctionBegin; 3856 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3857 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3858 3859 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3860 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3861 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3862 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3863 baij = (Mat_SeqBAIJ*)(*mat)->data; 3864 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3865 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3866 3867 baij->i = i; 3868 baij->j = j; 3869 baij->a = a; 3870 baij->singlemalloc = PETSC_FALSE; 3871 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3872 baij->free_a = PETSC_FALSE; 3873 baij->free_ij = PETSC_FALSE; 3874 3875 for (ii=0; ii<m; ii++) { 3876 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3877 #if defined(PETSC_USE_DEBUG) 3878 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3879 #endif 3880 } 3881 #if defined(PETSC_USE_DEBUG) 3882 for (ii=0; ii<baij->i[m]; ii++) { 3883 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3884 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3885 } 3886 #endif 3887 3888 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892