1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs){ 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,s1,s2; 224 const PetscScalar *b; 225 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 232 its = its*lits; 233 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 250 i2 = 2; 251 idiag += 4; 252 for (i=1; i<m; i++) { 253 v = aa + 4*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; 257 for (j=0; j<nz; j++) { 258 idx = 2*vi[j]; 259 it = 4*j; 260 x1 = x[idx]; x2 = x[1+idx]; 261 s1 -= v[it]*x1 + v[it+2]*x2; 262 s2 -= v[it+1]*x1 + v[it+3]*x2; 263 } 264 x[i2] = idiag[0]*s1 + idiag[2]*s2; 265 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 266 idiag += 4; 267 i2 += 2; 268 } 269 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 270 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 271 } 272 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 273 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 274 i2 = 0; 275 mdiag = a->idiag+4*a->mbs; 276 for (i=0; i<m; i++) { 277 x1 = x[i2]; x2 = x[i2+1]; 278 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 279 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 280 mdiag += 4; 281 i2 += 2; 282 } 283 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 284 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 286 } 287 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 288 idiag = a->idiag+4*a->mbs - 4; 289 i2 = 2*m - 2; 290 x1 = x[i2]; x2 = x[i2+1]; 291 x[i2] = idiag[0]*x1 + idiag[2]*x2; 292 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 293 idiag -= 4; 294 i2 -= 2; 295 for (i=m-2; i>=0; i--) { 296 v = aa + 4*(diag[i]+1); 297 vi = aj + diag[i] + 1; 298 nz = ai[i+1] - diag[i] - 1; 299 s1 = x[i2]; s2 = x[i2+1]; 300 for (j=0; j<nz; j++) { 301 idx = 2*vi[j]; 302 it = 4*j; 303 x1 = x[idx]; x2 = x[1+idx]; 304 s1 -= v[it]*x1 + v[it+2]*x2; 305 s2 -= v[it+1]*x1 + v[it+3]*x2; 306 } 307 x[i2] = idiag[0]*s1 + idiag[2]*s2; 308 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 309 idiag -= 4; 310 i2 -= 2; 311 } 312 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 313 } 314 } else { 315 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 316 } 317 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 PetscScalar *x,x1,x2,x3,s1,s2,s3; 328 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 329 const PetscScalar *b; 330 PetscErrorCode ierr; 331 PetscInt m = a->mbs,i,i2,nz,idx; 332 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 333 334 PetscFunctionBegin; 335 its = its*lits; 336 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 337 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 338 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 339 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 340 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 341 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 342 343 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 344 345 diag = a->diag; 346 idiag = a->idiag; 347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 349 350 if (flag & SOR_ZERO_INITIAL_GUESS) { 351 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 352 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 353 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 354 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 355 i2 = 3; 356 idiag += 9; 357 for (i=1; i<m; i++) { 358 v = aa + 9*ai[i]; 359 vi = aj + ai[i]; 360 nz = diag[i] - ai[i]; 361 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 362 while (nz--) { 363 idx = 3*(*vi++); 364 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 365 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 366 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 367 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 368 v += 9; 369 } 370 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 371 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 372 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 373 idiag += 9; 374 i2 += 3; 375 } 376 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 377 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 378 } 379 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 380 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 381 i2 = 0; 382 mdiag = a->idiag+9*a->mbs; 383 for (i=0; i<m; i++) { 384 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 385 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 386 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 387 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 388 mdiag += 9; 389 i2 += 3; 390 } 391 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 392 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 393 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 394 } 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 396 idiag = a->idiag+9*a->mbs - 9; 397 i2 = 3*m - 3; 398 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 399 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 400 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 401 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 402 idiag -= 9; 403 i2 -= 3; 404 for (i=m-2; i>=0; i--) { 405 v = aa + 9*(diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 409 while (nz--) { 410 idx = 3*(*vi++); 411 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 412 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 413 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 414 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 415 v += 9; 416 } 417 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 418 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 419 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 420 idiag -= 9; 421 i2 -= 3; 422 } 423 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 424 } 425 } else { 426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 427 } 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 439 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 440 const PetscScalar *b; 441 PetscErrorCode ierr; 442 PetscInt m = a->mbs,i,i2,nz,idx; 443 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 444 445 PetscFunctionBegin; 446 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 447 its = its*lits; 448 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 449 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 450 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 451 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 452 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 453 454 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 455 456 diag = a->diag; 457 idiag = a->idiag; 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 460 461 if (flag & SOR_ZERO_INITIAL_GUESS) { 462 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 463 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 464 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 465 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 466 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 467 i2 = 4; 468 idiag += 16; 469 for (i=1; i<m; i++) { 470 v = aa + 16*ai[i]; 471 vi = aj + ai[i]; 472 nz = diag[i] - ai[i]; 473 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 474 while (nz--) { 475 idx = 4*(*vi++); 476 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 477 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 478 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 479 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 480 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 481 v += 16; 482 } 483 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 484 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 485 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 486 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 487 idiag += 16; 488 i2 += 4; 489 } 490 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 491 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 492 } 493 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 494 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 495 i2 = 0; 496 mdiag = a->idiag+16*a->mbs; 497 for (i=0; i<m; i++) { 498 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 499 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 500 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 501 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 502 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 503 mdiag += 16; 504 i2 += 4; 505 } 506 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 507 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 508 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 509 } 510 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 511 idiag = a->idiag+16*a->mbs - 16; 512 i2 = 4*m - 4; 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 515 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 516 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 517 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 518 idiag -= 16; 519 i2 -= 4; 520 for (i=m-2; i>=0; i--) { 521 v = aa + 16*(diag[i]+1); 522 vi = aj + diag[i] + 1; 523 nz = ai[i+1] - diag[i] - 1; 524 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 525 while (nz--) { 526 idx = 4*(*vi++); 527 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 528 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 529 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 530 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 531 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 532 v += 16; 533 } 534 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 535 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 536 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 537 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 538 idiag -= 16; 539 i2 -= 4; 540 } 541 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 542 } 543 } else { 544 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 545 } 546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 547 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 554 { 555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 556 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 557 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 558 const PetscScalar *b; 559 PetscErrorCode ierr; 560 PetscInt m = a->mbs,i,i2,nz,idx; 561 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 562 563 PetscFunctionBegin; 564 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 565 its = its*lits; 566 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 567 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 568 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 569 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 570 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 571 572 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 573 574 diag = a->diag; 575 idiag = a->idiag; 576 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 578 579 if (flag & SOR_ZERO_INITIAL_GUESS) { 580 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 581 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 582 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 583 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 584 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 585 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 586 i2 = 5; 587 idiag += 25; 588 for (i=1; i<m; i++) { 589 v = aa + 25*ai[i]; 590 vi = aj + ai[i]; 591 nz = diag[i] - ai[i]; 592 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 593 while (nz--) { 594 idx = 5*(*vi++); 595 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 596 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 597 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 598 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 599 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 600 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 601 v += 25; 602 } 603 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 604 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 605 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 606 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 607 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 608 idiag += 25; 609 i2 += 5; 610 } 611 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 612 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 613 } 614 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 615 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 616 i2 = 0; 617 mdiag = a->idiag+25*a->mbs; 618 for (i=0; i<m; i++) { 619 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 620 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 621 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 622 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 623 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 624 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 625 mdiag += 25; 626 i2 += 5; 627 } 628 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 629 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 630 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 631 } 632 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 633 idiag = a->idiag+25*a->mbs - 25; 634 i2 = 5*m - 5; 635 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 636 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 637 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 638 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 639 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 640 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 641 idiag -= 25; 642 i2 -= 5; 643 for (i=m-2; i>=0; i--) { 644 v = aa + 25*(diag[i]+1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i+1] - diag[i] - 1; 647 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 651 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 652 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 653 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 654 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 655 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 656 v += 25; 657 } 658 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 659 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 660 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 661 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 662 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 663 idiag -= 25; 664 i2 -= 5; 665 } 666 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 670 } 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 679 { 680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 681 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 682 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 683 const PetscScalar *b; 684 PetscErrorCode ierr; 685 PetscInt m = a->mbs,i,i2,nz,idx; 686 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 687 688 PetscFunctionBegin; 689 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 690 its = its*lits; 691 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 692 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 693 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 694 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 695 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 696 697 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 698 699 diag = a->diag; 700 idiag = a->idiag; 701 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 703 704 if (flag & SOR_ZERO_INITIAL_GUESS) { 705 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 706 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 707 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 708 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 709 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 710 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 711 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 712 i2 = 6; 713 idiag += 36; 714 for (i=1; i<m; i++) { 715 v = aa + 36*ai[i]; 716 vi = aj + ai[i]; 717 nz = diag[i] - ai[i]; 718 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 719 while (nz--) { 720 idx = 6*(*vi++); 721 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 722 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 723 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 724 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 725 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 726 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 727 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 728 v += 36; 729 } 730 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 731 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 732 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 733 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 734 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 735 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 736 idiag += 36; 737 i2 += 6; 738 } 739 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 740 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 741 } 742 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 743 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 744 i2 = 0; 745 mdiag = a->idiag+36*a->mbs; 746 for (i=0; i<m; i++) { 747 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 748 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 749 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 750 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 751 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 752 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 753 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 754 mdiag += 36; 755 i2 += 6; 756 } 757 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 758 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 759 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 760 } 761 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 762 idiag = a->idiag+36*a->mbs - 36; 763 i2 = 6*m - 6; 764 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 765 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 766 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 767 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 768 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 769 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 770 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 771 idiag -= 36; 772 i2 -= 6; 773 for (i=m-2; i>=0; i--) { 774 v = aa + 36*(diag[i]+1); 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 778 while (nz--) { 779 idx = 6*(*vi++); 780 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 781 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 782 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 783 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 784 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 785 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 786 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 787 v += 36; 788 } 789 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 790 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 791 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 792 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 793 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 794 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 795 idiag -= 36; 796 i2 -= 6; 797 } 798 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 799 } 800 } else { 801 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 802 } 803 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 813 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 814 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 815 const PetscScalar *b; 816 PetscErrorCode ierr; 817 PetscInt m = a->mbs,i,i2,nz,idx; 818 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 819 820 PetscFunctionBegin; 821 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 822 its = its*lits; 823 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 824 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 825 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 826 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 827 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 828 829 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 830 831 diag = a->diag; 832 idiag = a->idiag; 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 835 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 838 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 839 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 840 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 841 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 842 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 843 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 844 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 845 i2 = 7; 846 idiag += 49; 847 for (i=1; i<m; i++) { 848 v = aa + 49*ai[i]; 849 vi = aj + ai[i]; 850 nz = diag[i] - ai[i]; 851 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 852 while (nz--) { 853 idx = 7*(*vi++); 854 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 855 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 856 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 857 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 858 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 859 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 860 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 861 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 862 v += 49; 863 } 864 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 865 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 866 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 867 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 868 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 869 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 870 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 871 idiag += 49; 872 i2 += 7; 873 } 874 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 875 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 876 } 877 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 878 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 879 i2 = 0; 880 mdiag = a->idiag+49*a->mbs; 881 for (i=0; i<m; i++) { 882 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 883 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 884 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 885 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 886 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 887 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 888 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 889 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 890 mdiag += 49; 891 i2 += 7; 892 } 893 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 894 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 895 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 896 } 897 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 898 idiag = a->idiag+49*a->mbs - 49; 899 i2 = 7*m - 7; 900 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 901 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 902 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 903 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 904 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 905 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 906 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 907 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 908 idiag -= 49; 909 i2 -= 7; 910 for (i=m-2; i>=0; i--) { 911 v = aa + 49*(diag[i]+1); 912 vi = aj + diag[i] + 1; 913 nz = ai[i+1] - diag[i] - 1; 914 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 918 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 919 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 920 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 921 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 922 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 923 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 924 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 925 v += 49; 926 } 927 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 928 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 929 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 930 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 931 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 932 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 933 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 934 idiag -= 49; 935 i2 -= 7; 936 } 937 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 938 } 939 } else { 940 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 941 } 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 950 { 951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 952 PetscScalar *x,*work,*w,*workt; 953 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 954 const PetscScalar *b; 955 PetscErrorCode ierr; 956 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 957 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 958 959 PetscFunctionBegin; 960 its = its*lits; 961 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 962 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 963 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 964 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 965 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 966 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 967 968 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 969 970 diag = a->diag; 971 idiag = a->idiag; 972 if (!a->mult_work) { 973 k = PetscMax(A->rmap->n,A->cmap->n); 974 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 975 } 976 work = a->mult_work; 977 if (!a->sor_work) { 978 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 979 } 980 w = a->sor_work; 981 982 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 984 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 987 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 988 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 989 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 990 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 991 i2 = bs; 992 idiag += bs2; 993 for (i=1; i<m; i++) { 994 v = aa + bs2*ai[i]; 995 vi = aj + ai[i]; 996 nz = diag[i] - ai[i]; 997 998 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 999 /* copy all rows of x that are needed into contiguous space */ 1000 workt = work; 1001 for (j=0; j<nz; j++) { 1002 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1003 workt += bs; 1004 } 1005 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1006 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1007 while (nz--) { 1008 idx = N*(*vi++); 1009 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1010 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1011 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1012 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1013 v += N2; 1014 } */ 1015 1016 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1017 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1018 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1019 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1020 1021 idiag += bs2; 1022 i2 += bs; 1023 } 1024 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1025 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1026 } 1027 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1028 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1029 i2 = 0; 1030 mdiag = a->idiag+bs2*a->mbs; 1031 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 for (i=0; i<m; i++) { 1033 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1034 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1035 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1036 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1037 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1038 1039 mdiag += bs2; 1040 i2 += bs; 1041 } 1042 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1043 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1044 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1047 idiag = a->idiag+bs2*a->mbs - bs2; 1048 i2 = bs*m - bs; 1049 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1050 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1051 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1052 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1053 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1054 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1055 idiag -= bs2; 1056 i2 -= bs; 1057 for (i=m-2; i>=0; i--) { 1058 v = aa + bs2*(diag[i]+1); 1059 vi = aj + diag[i] + 1; 1060 nz = ai[i+1] - diag[i] - 1; 1061 1062 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 /* copy all rows of x that are needed into contiguous space */ 1064 workt = work; 1065 for (j=0; j<nz; j++) { 1066 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1067 workt += bs; 1068 } 1069 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1070 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1071 while (nz--) { 1072 idx = N*(*vi++); 1073 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1074 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1075 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1076 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1077 v += N2; 1078 } */ 1079 1080 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1081 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1082 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1083 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1084 idiag -= bs2; 1085 i2 -= bs; 1086 } 1087 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1088 } 1089 } else { 1090 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1091 } 1092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 /* 1098 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1099 */ 1100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1103 #define matsetvaluesblocked4_ matsetvaluesblocked4 1104 #endif 1105 1106 EXTERN_C_BEGIN 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "matsetvaluesblocked4_" 1109 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1110 { 1111 Mat A = *AA; 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1114 PetscInt *ai=a->i,*ailen=a->ilen; 1115 PetscInt *aj=a->j,stepval,lastcol = -1; 1116 const PetscScalar *value = v; 1117 MatScalar *ap,*aa = a->a,*bap; 1118 1119 PetscFunctionBegin; 1120 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1121 stepval = (n-1)*4; 1122 for (k=0; k<m; k++) { /* loop over added rows */ 1123 row = im[k]; 1124 rp = aj + ai[row]; 1125 ap = aa + 16*ai[row]; 1126 nrow = ailen[row]; 1127 low = 0; 1128 high = nrow; 1129 for (l=0; l<n; l++) { /* loop over added columns */ 1130 col = in[l]; 1131 if (col <= lastcol) low = 0; else high = nrow; 1132 lastcol = col; 1133 value = v + k*(stepval+4 + l)*4; 1134 while (high-low > 7) { 1135 t = (low+high)/2; 1136 if (rp[t] > col) high = t; 1137 else low = t; 1138 } 1139 for (i=low; i<high; i++) { 1140 if (rp[i] > col) break; 1141 if (rp[i] == col) { 1142 bap = ap + 16*i; 1143 for (ii=0; ii<4; ii++,value+=stepval) { 1144 for (jj=ii; jj<16; jj+=4) { 1145 bap[jj] += *value++; 1146 } 1147 } 1148 goto noinsert2; 1149 } 1150 } 1151 N = nrow++ - 1; 1152 high++; /* added new column index thus must search to one higher than before */ 1153 /* shift up all the later entries in this row */ 1154 for (ii=N; ii>=i; ii--) { 1155 rp[ii+1] = rp[ii]; 1156 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1157 } 1158 if (N >= i) { 1159 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1160 } 1161 rp[i] = col; 1162 bap = ap + 16*i; 1163 for (ii=0; ii<4; ii++,value+=stepval) { 1164 for (jj=ii; jj<16; jj+=4) { 1165 bap[jj] = *value++; 1166 } 1167 } 1168 noinsert2:; 1169 low = i; 1170 } 1171 ailen[row] = nrow; 1172 } 1173 PetscFunctionReturnVoid(); 1174 } 1175 EXTERN_C_END 1176 1177 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1178 #define matsetvalues4_ MATSETVALUES4 1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1180 #define matsetvalues4_ matsetvalues4 1181 #endif 1182 1183 EXTERN_C_BEGIN 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatSetValues4_" 1186 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1187 { 1188 Mat A = *AA; 1189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1190 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1191 PetscInt *ai=a->i,*ailen=a->ilen; 1192 PetscInt *aj=a->j,brow,bcol; 1193 PetscInt ridx,cidx,lastcol = -1; 1194 MatScalar *ap,value,*aa=a->a,*bap; 1195 1196 PetscFunctionBegin; 1197 for (k=0; k<m; k++) { /* loop over added rows */ 1198 row = im[k]; brow = row/4; 1199 rp = aj + ai[brow]; 1200 ap = aa + 16*ai[brow]; 1201 nrow = ailen[brow]; 1202 low = 0; 1203 high = nrow; 1204 for (l=0; l<n; l++) { /* loop over added columns */ 1205 col = in[l]; bcol = col/4; 1206 ridx = row % 4; cidx = col % 4; 1207 value = v[l + k*n]; 1208 if (col <= lastcol) low = 0; else high = nrow; 1209 lastcol = col; 1210 while (high-low > 7) { 1211 t = (low+high)/2; 1212 if (rp[t] > bcol) high = t; 1213 else low = t; 1214 } 1215 for (i=low; i<high; i++) { 1216 if (rp[i] > bcol) break; 1217 if (rp[i] == bcol) { 1218 bap = ap + 16*i + 4*cidx + ridx; 1219 *bap += value; 1220 goto noinsert1; 1221 } 1222 } 1223 N = nrow++ - 1; 1224 high++; /* added new column thus must search to one higher than before */ 1225 /* shift up all the later entries in this row */ 1226 for (ii=N; ii>=i; ii--) { 1227 rp[ii+1] = rp[ii]; 1228 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1229 } 1230 if (N>=i) { 1231 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1232 } 1233 rp[i] = bcol; 1234 ap[16*i + 4*cidx + ridx] = value; 1235 noinsert1:; 1236 low = i; 1237 } 1238 ailen[brow] = nrow; 1239 } 1240 PetscFunctionReturnVoid(); 1241 } 1242 EXTERN_C_END 1243 1244 /* 1245 Checks for missing diagonals 1246 */ 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 PetscInt *diag,*jj = a->j,i; 1254 1255 PetscFunctionBegin; 1256 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1257 *missing = PETSC_FALSE; 1258 if (A->rmap->n > 0 && !jj) { 1259 *missing = PETSC_TRUE; 1260 if (d) *d = 0; 1261 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1262 } else { 1263 diag = a->diag; 1264 for (i=0; i<a->mbs; i++) { 1265 if (jj[diag[i]] != i) { 1266 *missing = PETSC_TRUE; 1267 if (d) *d = i; 1268 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1269 break; 1270 } 1271 } 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt i,j,m = a->mbs; 1283 1284 PetscFunctionBegin; 1285 if (!a->diag) { 1286 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1287 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1288 a->free_diag = PETSC_TRUE; 1289 } 1290 for (i=0; i<m; i++) { 1291 a->diag[i] = a->i[i+1]; 1292 for (j=a->i[i]; j<a->i[i+1]; j++) { 1293 if (a->j[j] == i) { 1294 a->diag[i] = j; 1295 break; 1296 } 1297 } 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 1303 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1312 PetscInt *tia, *tja; 1313 1314 PetscFunctionBegin; 1315 *nn = n; 1316 if (!ia) PetscFunctionReturn(0); 1317 if (symmetric) { 1318 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1319 nz = tia[n]; 1320 } else { 1321 tia = a->i; tja = a->j; 1322 } 1323 1324 if (!blockcompressed && bs > 1) { 1325 (*nn) *= bs; 1326 /* malloc & create the natural set of indices */ 1327 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1328 if (n) { 1329 (*ia)[0] = 0; 1330 for (j=1; j<bs; j++) { 1331 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1332 } 1333 } 1334 1335 for (i=1; i<n; i++) { 1336 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1337 for (j=1; j<bs; j++) { 1338 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1339 } 1340 } 1341 if (n) { 1342 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1343 } 1344 1345 if (ja) { 1346 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1347 cnt = 0; 1348 for (i=0; i<n; i++) { 1349 for (j=0; j<bs; j++) { 1350 for (k=tia[i]; k<tia[i+1]; k++) { 1351 for (l=0; l<bs; l++) { 1352 (*ja)[cnt++] = bs*tja[k] + l; 1353 } 1354 } 1355 } 1356 } 1357 } 1358 1359 n *= bs; 1360 nz *= bs*bs; 1361 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1362 ierr = PetscFree(tia);CHKERRQ(ierr); 1363 ierr = PetscFree(tja);CHKERRQ(ierr); 1364 } 1365 } else if (oshift == 1) { 1366 if (symmetric) { 1367 PetscInt nz = tia[A->rmap->n/bs]; 1368 /* add 1 to i and j indices */ 1369 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1370 *ia = tia; 1371 if (ja) { 1372 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1373 *ja = tja; 1374 } 1375 } else { 1376 PetscInt nz = a->i[A->rmap->n/bs]; 1377 /* malloc space and add 1 to i and j indices */ 1378 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1379 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1380 if (ja) { 1381 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1382 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1383 } 1384 } 1385 } else { 1386 *ia = tia; 1387 if (ja) *ja = tja; 1388 } 1389 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1395 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1396 { 1397 PetscErrorCode ierr; 1398 1399 PetscFunctionBegin; 1400 if (!ia) PetscFunctionReturn(0); 1401 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1402 ierr = PetscFree(*ia);CHKERRQ(ierr); 1403 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1410 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1411 { 1412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 #if defined(PETSC_USE_LOG) 1417 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1418 #endif 1419 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1420 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1421 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1422 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1423 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1424 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1425 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1426 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1427 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1428 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1429 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1430 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1431 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1432 1433 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1434 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1435 ierr = PetscFree(A->data);CHKERRQ(ierr); 1436 1437 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1443 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1445 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1446 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1447 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1453 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1454 { 1455 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1456 PetscErrorCode ierr; 1457 1458 PetscFunctionBegin; 1459 switch (op) { 1460 case MAT_ROW_ORIENTED: 1461 a->roworiented = flg; 1462 break; 1463 case MAT_KEEP_NONZERO_PATTERN: 1464 a->keepnonzeropattern = flg; 1465 break; 1466 case MAT_NEW_NONZERO_LOCATIONS: 1467 a->nonew = (flg ? 0 : 1); 1468 break; 1469 case MAT_NEW_NONZERO_LOCATION_ERR: 1470 a->nonew = (flg ? -1 : 0); 1471 break; 1472 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1473 a->nonew = (flg ? -2 : 0); 1474 break; 1475 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1476 a->nounused = (flg ? -1 : 0); 1477 break; 1478 case MAT_CHECK_COMPRESSED_ROW: 1479 a->compressedrow.check = flg; 1480 break; 1481 case MAT_NEW_DIAGONALS: 1482 case MAT_IGNORE_OFF_PROC_ENTRIES: 1483 case MAT_USE_HASH_TABLE: 1484 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1485 break; 1486 case MAT_SPD: 1487 case MAT_SYMMETRIC: 1488 case MAT_STRUCTURALLY_SYMMETRIC: 1489 case MAT_HERMITIAN: 1490 case MAT_SYMMETRY_ETERNAL: 1491 /* These options are handled directly by MatSetOption() */ 1492 break; 1493 default: 1494 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 #undef __FUNCT__ 1500 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1501 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1502 { 1503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1504 PetscErrorCode ierr; 1505 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1506 MatScalar *aa,*aa_i; 1507 PetscScalar *v_i; 1508 1509 PetscFunctionBegin; 1510 bs = A->rmap->bs; 1511 ai = a->i; 1512 aj = a->j; 1513 aa = a->a; 1514 bs2 = a->bs2; 1515 1516 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1517 1518 bn = row/bs; /* Block number */ 1519 bp = row % bs; /* Block Position */ 1520 M = ai[bn+1] - ai[bn]; 1521 *nz = bs*M; 1522 1523 if (v) { 1524 *v = 0; 1525 if (*nz) { 1526 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1527 for (i=0; i<M; i++) { /* for each block in the block row */ 1528 v_i = *v + i*bs; 1529 aa_i = aa + bs2*(ai[bn] + i); 1530 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1531 } 1532 } 1533 } 1534 1535 if (idx) { 1536 *idx = 0; 1537 if (*nz) { 1538 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1539 for (i=0; i<M; i++) { /* for each block in the block row */ 1540 idx_i = *idx + i*bs; 1541 itmp = bs*aj[ai[bn] + i]; 1542 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1543 } 1544 } 1545 } 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1551 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1552 { 1553 PetscErrorCode ierr; 1554 1555 PetscFunctionBegin; 1556 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1557 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1558 PetscFunctionReturn(0); 1559 } 1560 1561 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1562 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1565 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1566 { 1567 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1568 Mat C; 1569 PetscErrorCode ierr; 1570 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1571 PetscInt *rows,*cols,bs2=a->bs2; 1572 MatScalar *array; 1573 1574 PetscFunctionBegin; 1575 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1576 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1577 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1578 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1579 1580 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1581 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1582 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1583 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1584 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1585 ierr = PetscFree(col);CHKERRQ(ierr); 1586 } else { 1587 C = *B; 1588 } 1589 1590 array = a->a; 1591 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1592 for (i=0; i<mbs; i++) { 1593 cols[0] = i*bs; 1594 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1595 len = ai[i+1] - ai[i]; 1596 for (j=0; j<len; j++) { 1597 rows[0] = (*aj++)*bs; 1598 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1599 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1600 array += bs2; 1601 } 1602 } 1603 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1604 1605 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1606 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 1608 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1609 *B = C; 1610 } else { 1611 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1612 } 1613 PetscFunctionReturn(0); 1614 } 1615 1616 EXTERN_C_BEGIN 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1619 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1620 { 1621 PetscErrorCode ierr; 1622 Mat Btrans; 1623 1624 PetscFunctionBegin; 1625 *f = PETSC_FALSE; 1626 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1627 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1628 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 EXTERN_C_END 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1635 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1636 { 1637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1638 PetscErrorCode ierr; 1639 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1640 int fd; 1641 PetscScalar *aa; 1642 FILE *file; 1643 1644 PetscFunctionBegin; 1645 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1646 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1647 col_lens[0] = MAT_FILE_CLASSID; 1648 1649 col_lens[1] = A->rmap->N; 1650 col_lens[2] = A->cmap->n; 1651 col_lens[3] = a->nz*bs2; 1652 1653 /* store lengths of each row and write (including header) to file */ 1654 count = 0; 1655 for (i=0; i<a->mbs; i++) { 1656 for (j=0; j<bs; j++) { 1657 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1658 } 1659 } 1660 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1661 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1662 1663 /* store column indices (zero start index) */ 1664 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1665 count = 0; 1666 for (i=0; i<a->mbs; i++) { 1667 for (j=0; j<bs; j++) { 1668 for (k=a->i[i]; k<a->i[i+1]; k++) { 1669 for (l=0; l<bs; l++) { 1670 jj[count++] = bs*a->j[k] + l; 1671 } 1672 } 1673 } 1674 } 1675 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1676 ierr = PetscFree(jj);CHKERRQ(ierr); 1677 1678 /* store nonzero values */ 1679 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1680 count = 0; 1681 for (i=0; i<a->mbs; i++) { 1682 for (j=0; j<bs; j++) { 1683 for (k=a->i[i]; k<a->i[i+1]; k++) { 1684 for (l=0; l<bs; l++) { 1685 aa[count++] = a->a[bs2*k + l*bs + j]; 1686 } 1687 } 1688 } 1689 } 1690 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1691 ierr = PetscFree(aa);CHKERRQ(ierr); 1692 1693 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1694 if (file) { 1695 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1702 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1703 { 1704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1705 PetscErrorCode ierr; 1706 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1707 PetscViewerFormat format; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1711 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1712 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1713 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1714 Mat aij; 1715 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1716 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1717 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1718 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1719 PetscFunctionReturn(0); 1720 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1721 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1722 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1723 for (i=0; i<a->mbs; i++) { 1724 for (j=0; j<bs; j++) { 1725 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1726 for (k=a->i[i]; k<a->i[i+1]; k++) { 1727 for (l=0; l<bs; l++) { 1728 #if defined(PETSC_USE_COMPLEX) 1729 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1730 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1731 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1732 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1733 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1734 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1735 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1736 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1737 } 1738 #else 1739 if (a->a[bs2*k + l*bs + j] != 0.0) { 1740 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1741 } 1742 #endif 1743 } 1744 } 1745 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1746 } 1747 } 1748 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1749 } else { 1750 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1751 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1752 for (i=0; i<a->mbs; i++) { 1753 for (j=0; j<bs; j++) { 1754 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1755 for (k=a->i[i]; k<a->i[i+1]; k++) { 1756 for (l=0; l<bs; l++) { 1757 #if defined(PETSC_USE_COMPLEX) 1758 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1760 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1761 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1762 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1763 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1764 } else { 1765 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1766 } 1767 #else 1768 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1769 #endif 1770 } 1771 } 1772 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1773 } 1774 } 1775 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1776 } 1777 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1783 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1784 { 1785 Mat A = (Mat) Aa; 1786 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1787 PetscErrorCode ierr; 1788 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1789 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1790 MatScalar *aa; 1791 PetscViewer viewer; 1792 PetscViewerFormat format; 1793 1794 PetscFunctionBegin; 1795 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1796 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1797 1798 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1799 1800 /* loop over matrix elements drawing boxes */ 1801 1802 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1803 color = PETSC_DRAW_BLUE; 1804 for (i=0,row=0; i<mbs; i++,row+=bs) { 1805 for (j=a->i[i]; j<a->i[i+1]; j++) { 1806 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1807 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1808 aa = a->a + j*bs2; 1809 for (k=0; k<bs; k++) { 1810 for (l=0; l<bs; l++) { 1811 if (PetscRealPart(*aa++) >= 0.) continue; 1812 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1813 } 1814 } 1815 } 1816 } 1817 color = PETSC_DRAW_CYAN; 1818 for (i=0,row=0; i<mbs; i++,row+=bs) { 1819 for (j=a->i[i]; j<a->i[i+1]; j++) { 1820 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1821 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1822 aa = a->a + j*bs2; 1823 for (k=0; k<bs; k++) { 1824 for (l=0; l<bs; l++) { 1825 if (PetscRealPart(*aa++) != 0.) continue; 1826 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1827 } 1828 } 1829 } 1830 } 1831 color = PETSC_DRAW_RED; 1832 for (i=0,row=0; i<mbs; i++,row+=bs) { 1833 for (j=a->i[i]; j<a->i[i+1]; j++) { 1834 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1835 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1836 aa = a->a + j*bs2; 1837 for (k=0; k<bs; k++) { 1838 for (l=0; l<bs; l++) { 1839 if (PetscRealPart(*aa++) <= 0.) continue; 1840 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1841 } 1842 } 1843 } 1844 } 1845 } else { 1846 /* use contour shading to indicate magnitude of values */ 1847 /* first determine max of all nonzero values */ 1848 PetscDraw popup; 1849 PetscReal scale,maxv = 0.0; 1850 1851 for (i=0; i<a->nz*a->bs2; i++) { 1852 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1853 } 1854 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1855 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1856 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1857 for (i=0,row=0; i<mbs; i++,row+=bs) { 1858 for (j=a->i[i]; j<a->i[i+1]; j++) { 1859 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1860 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1861 aa = a->a + j*bs2; 1862 for (k=0; k<bs; k++) { 1863 for (l=0; l<bs; l++) { 1864 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1865 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1866 } 1867 } 1868 } 1869 } 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1876 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1877 { 1878 PetscErrorCode ierr; 1879 PetscReal xl,yl,xr,yr,w,h; 1880 PetscDraw draw; 1881 PetscBool isnull; 1882 1883 PetscFunctionBegin; 1884 1885 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1886 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1887 1888 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1889 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1890 xr += w; yr += h; xl = -w; yl = -h; 1891 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1892 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1893 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatView_SeqBAIJ" 1899 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1900 { 1901 PetscErrorCode ierr; 1902 PetscBool iascii,isbinary,isdraw; 1903 1904 PetscFunctionBegin; 1905 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1906 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1907 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1908 if (iascii){ 1909 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1910 } else if (isbinary) { 1911 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1912 } else if (isdraw) { 1913 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1914 } else { 1915 Mat B; 1916 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1917 ierr = MatView(B,viewer);CHKERRQ(ierr); 1918 ierr = MatDestroy(&B);CHKERRQ(ierr); 1919 } 1920 PetscFunctionReturn(0); 1921 } 1922 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1926 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1927 { 1928 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1929 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1930 PetscInt *ai = a->i,*ailen = a->ilen; 1931 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1932 MatScalar *ap,*aa = a->a; 1933 1934 PetscFunctionBegin; 1935 for (k=0; k<m; k++) { /* loop over rows */ 1936 row = im[k]; brow = row/bs; 1937 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1938 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1939 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1940 nrow = ailen[brow]; 1941 for (l=0; l<n; l++) { /* loop over columns */ 1942 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1943 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1944 col = in[l] ; 1945 bcol = col/bs; 1946 cidx = col%bs; 1947 ridx = row%bs; 1948 high = nrow; 1949 low = 0; /* assume unsorted */ 1950 while (high-low > 5) { 1951 t = (low+high)/2; 1952 if (rp[t] > bcol) high = t; 1953 else low = t; 1954 } 1955 for (i=low; i<high; i++) { 1956 if (rp[i] > bcol) break; 1957 if (rp[i] == bcol) { 1958 *v++ = ap[bs2*i+bs*cidx+ridx]; 1959 goto finished; 1960 } 1961 } 1962 *v++ = 0.0; 1963 finished:; 1964 } 1965 } 1966 PetscFunctionReturn(0); 1967 } 1968 1969 #undef __FUNCT__ 1970 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1971 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1972 { 1973 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1974 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1975 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1976 PetscErrorCode ierr; 1977 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1978 PetscBool roworiented=a->roworiented; 1979 const PetscScalar *value = v; 1980 MatScalar *ap,*aa = a->a,*bap; 1981 1982 PetscFunctionBegin; 1983 if (roworiented) { 1984 stepval = (n-1)*bs; 1985 } else { 1986 stepval = (m-1)*bs; 1987 } 1988 for (k=0; k<m; k++) { /* loop over added rows */ 1989 row = im[k]; 1990 if (row < 0) continue; 1991 #if defined(PETSC_USE_DEBUG) 1992 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1993 #endif 1994 rp = aj + ai[row]; 1995 ap = aa + bs2*ai[row]; 1996 rmax = imax[row]; 1997 nrow = ailen[row]; 1998 low = 0; 1999 high = nrow; 2000 for (l=0; l<n; l++) { /* loop over added columns */ 2001 if (in[l] < 0) continue; 2002 #if defined(PETSC_USE_DEBUG) 2003 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 2004 #endif 2005 col = in[l]; 2006 if (roworiented) { 2007 value = v + (k*(stepval+bs) + l)*bs; 2008 } else { 2009 value = v + (l*(stepval+bs) + k)*bs; 2010 } 2011 if (col <= lastcol) low = 0; else high = nrow; 2012 lastcol = col; 2013 while (high-low > 7) { 2014 t = (low+high)/2; 2015 if (rp[t] > col) high = t; 2016 else low = t; 2017 } 2018 for (i=low; i<high; i++) { 2019 if (rp[i] > col) break; 2020 if (rp[i] == col) { 2021 bap = ap + bs2*i; 2022 if (roworiented) { 2023 if (is == ADD_VALUES) { 2024 for (ii=0; ii<bs; ii++,value+=stepval) { 2025 for (jj=ii; jj<bs2; jj+=bs) { 2026 bap[jj] += *value++; 2027 } 2028 } 2029 } else { 2030 for (ii=0; ii<bs; ii++,value+=stepval) { 2031 for (jj=ii; jj<bs2; jj+=bs) { 2032 bap[jj] = *value++; 2033 } 2034 } 2035 } 2036 } else { 2037 if (is == ADD_VALUES) { 2038 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2039 for (jj=0; jj<bs; jj++) { 2040 bap[jj] += value[jj]; 2041 } 2042 bap += bs; 2043 } 2044 } else { 2045 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2046 for (jj=0; jj<bs; jj++) { 2047 bap[jj] = value[jj]; 2048 } 2049 bap += bs; 2050 } 2051 } 2052 } 2053 goto noinsert2; 2054 } 2055 } 2056 if (nonew == 1) goto noinsert2; 2057 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2058 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2059 N = nrow++ - 1; high++; 2060 /* shift up all the later entries in this row */ 2061 for (ii=N; ii>=i; ii--) { 2062 rp[ii+1] = rp[ii]; 2063 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2064 } 2065 if (N >= i) { 2066 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2067 } 2068 rp[i] = col; 2069 bap = ap + bs2*i; 2070 if (roworiented) { 2071 for (ii=0; ii<bs; ii++,value+=stepval) { 2072 for (jj=ii; jj<bs2; jj+=bs) { 2073 bap[jj] = *value++; 2074 } 2075 } 2076 } else { 2077 for (ii=0; ii<bs; ii++,value+=stepval) { 2078 for (jj=0; jj<bs; jj++) { 2079 *bap++ = *value++; 2080 } 2081 } 2082 } 2083 noinsert2:; 2084 low = i; 2085 } 2086 ailen[row] = nrow; 2087 } 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2093 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2094 { 2095 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2096 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2097 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2098 PetscErrorCode ierr; 2099 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2100 MatScalar *aa = a->a,*ap; 2101 PetscReal ratio=0.6; 2102 2103 PetscFunctionBegin; 2104 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2105 2106 if (m) rmax = ailen[0]; 2107 for (i=1; i<mbs; i++) { 2108 /* move each row back by the amount of empty slots (fshift) before it*/ 2109 fshift += imax[i-1] - ailen[i-1]; 2110 rmax = PetscMax(rmax,ailen[i]); 2111 if (fshift) { 2112 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2113 N = ailen[i]; 2114 for (j=0; j<N; j++) { 2115 ip[j-fshift] = ip[j]; 2116 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2117 } 2118 } 2119 ai[i] = ai[i-1] + ailen[i-1]; 2120 } 2121 if (mbs) { 2122 fshift += imax[mbs-1] - ailen[mbs-1]; 2123 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2124 } 2125 /* reset ilen and imax for each row */ 2126 for (i=0; i<mbs; i++) { 2127 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2128 } 2129 a->nz = ai[mbs]; 2130 2131 /* diagonals may have moved, so kill the diagonal pointers */ 2132 a->idiagvalid = PETSC_FALSE; 2133 if (fshift && a->diag) { 2134 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2135 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2136 a->diag = 0; 2137 } 2138 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2139 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2140 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2141 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2142 A->info.mallocs += a->reallocs; 2143 a->reallocs = 0; 2144 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2145 2146 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2147 A->same_nonzero = PETSC_TRUE; 2148 PetscFunctionReturn(0); 2149 } 2150 2151 /* 2152 This function returns an array of flags which indicate the locations of contiguous 2153 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2154 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2155 Assume: sizes should be long enough to hold all the values. 2156 */ 2157 #undef __FUNCT__ 2158 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2159 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2160 { 2161 PetscInt i,j,k,row; 2162 PetscBool flg; 2163 2164 PetscFunctionBegin; 2165 for (i=0,j=0; i<n; j++) { 2166 row = idx[i]; 2167 if (row%bs!=0) { /* Not the begining of a block */ 2168 sizes[j] = 1; 2169 i++; 2170 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2171 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2172 i++; 2173 } else { /* Begining of the block, so check if the complete block exists */ 2174 flg = PETSC_TRUE; 2175 for (k=1; k<bs; k++) { 2176 if (row+k != idx[i+k]) { /* break in the block */ 2177 flg = PETSC_FALSE; 2178 break; 2179 } 2180 } 2181 if (flg) { /* No break in the bs */ 2182 sizes[j] = bs; 2183 i+= bs; 2184 } else { 2185 sizes[j] = 1; 2186 i++; 2187 } 2188 } 2189 } 2190 *bs_max = j; 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2196 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2197 { 2198 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2199 PetscErrorCode ierr; 2200 PetscInt i,j,k,count,*rows; 2201 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2202 PetscScalar zero = 0.0; 2203 MatScalar *aa; 2204 const PetscScalar *xx; 2205 PetscScalar *bb; 2206 2207 PetscFunctionBegin; 2208 /* fix right hand side if needed */ 2209 if (x && b) { 2210 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2211 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2212 for (i=0; i<is_n; i++) { 2213 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2214 } 2215 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2216 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2217 } 2218 2219 /* Make a copy of the IS and sort it */ 2220 /* allocate memory for rows,sizes */ 2221 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2222 2223 /* copy IS values to rows, and sort them */ 2224 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2225 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2226 2227 if (baij->keepnonzeropattern) { 2228 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2229 bs_max = is_n; 2230 A->same_nonzero = PETSC_TRUE; 2231 } else { 2232 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2233 A->same_nonzero = PETSC_FALSE; 2234 } 2235 2236 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2237 row = rows[j]; 2238 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2239 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2240 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2241 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2242 if (diag != (PetscScalar)0.0) { 2243 if (baij->ilen[row/bs] > 0) { 2244 baij->ilen[row/bs] = 1; 2245 baij->j[baij->i[row/bs]] = row/bs; 2246 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2247 } 2248 /* Now insert all the diagonal values for this bs */ 2249 for (k=0; k<bs; k++) { 2250 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2251 } 2252 } else { /* (diag == 0.0) */ 2253 baij->ilen[row/bs] = 0; 2254 } /* end (diag == 0.0) */ 2255 } else { /* (sizes[i] != bs) */ 2256 #if defined (PETSC_USE_DEBUG) 2257 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2258 #endif 2259 for (k=0; k<count; k++) { 2260 aa[0] = zero; 2261 aa += bs; 2262 } 2263 if (diag != (PetscScalar)0.0) { 2264 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2265 } 2266 } 2267 } 2268 2269 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2270 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2276 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2277 { 2278 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2279 PetscErrorCode ierr; 2280 PetscInt i,j,k,count; 2281 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2282 PetscScalar zero = 0.0; 2283 MatScalar *aa; 2284 const PetscScalar *xx; 2285 PetscScalar *bb; 2286 PetscBool *zeroed,vecs = PETSC_FALSE; 2287 2288 PetscFunctionBegin; 2289 /* fix right hand side if needed */ 2290 if (x && b) { 2291 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2292 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2293 vecs = PETSC_TRUE; 2294 } 2295 A->same_nonzero = PETSC_TRUE; 2296 2297 /* zero the columns */ 2298 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2299 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2300 for (i=0; i<is_n; i++) { 2301 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2302 zeroed[is_idx[i]] = PETSC_TRUE; 2303 } 2304 for (i=0; i<A->rmap->N; i++) { 2305 if (!zeroed[i]) { 2306 row = i/bs; 2307 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2308 for (k=0; k<bs; k++) { 2309 col = bs*baij->j[j] + k; 2310 if (zeroed[col]) { 2311 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2312 if (vecs) bb[i] -= aa[0]*xx[col]; 2313 aa[0] = 0.0; 2314 } 2315 } 2316 } 2317 } else if (vecs) bb[i] = diag*xx[i]; 2318 } 2319 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2320 if (vecs) { 2321 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2322 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2323 } 2324 2325 /* zero the rows */ 2326 for (i=0; i<is_n; i++) { 2327 row = is_idx[i]; 2328 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2329 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2330 for (k=0; k<count; k++) { 2331 aa[0] = zero; 2332 aa += bs; 2333 } 2334 if (diag != (PetscScalar)0.0) { 2335 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2336 } 2337 } 2338 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 #undef __FUNCT__ 2343 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2344 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2345 { 2346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2347 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2348 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2349 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2350 PetscErrorCode ierr; 2351 PetscInt ridx,cidx,bs2=a->bs2; 2352 PetscBool roworiented=a->roworiented; 2353 MatScalar *ap,value,*aa=a->a,*bap; 2354 2355 PetscFunctionBegin; 2356 if (v) PetscValidScalarPointer(v,6); 2357 for (k=0; k<m; k++) { /* loop over added rows */ 2358 row = im[k]; 2359 brow = row/bs; 2360 if (row < 0) continue; 2361 #if defined(PETSC_USE_DEBUG) 2362 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2363 #endif 2364 rp = aj + ai[brow]; 2365 ap = aa + bs2*ai[brow]; 2366 rmax = imax[brow]; 2367 nrow = ailen[brow]; 2368 low = 0; 2369 high = nrow; 2370 for (l=0; l<n; l++) { /* loop over added columns */ 2371 if (in[l] < 0) continue; 2372 #if defined(PETSC_USE_DEBUG) 2373 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2374 #endif 2375 col = in[l]; bcol = col/bs; 2376 ridx = row % bs; cidx = col % bs; 2377 if (roworiented) { 2378 value = v[l + k*n]; 2379 } else { 2380 value = v[k + l*m]; 2381 } 2382 if (col <= lastcol) low = 0; else high = nrow; 2383 lastcol = col; 2384 while (high-low > 7) { 2385 t = (low+high)/2; 2386 if (rp[t] > bcol) high = t; 2387 else low = t; 2388 } 2389 for (i=low; i<high; i++) { 2390 if (rp[i] > bcol) break; 2391 if (rp[i] == bcol) { 2392 bap = ap + bs2*i + bs*cidx + ridx; 2393 if (is == ADD_VALUES) *bap += value; 2394 else *bap = value; 2395 goto noinsert1; 2396 } 2397 } 2398 if (nonew == 1) goto noinsert1; 2399 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2400 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2401 N = nrow++ - 1; high++; 2402 /* shift up all the later entries in this row */ 2403 for (ii=N; ii>=i; ii--) { 2404 rp[ii+1] = rp[ii]; 2405 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2406 } 2407 if (N>=i) { 2408 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2409 } 2410 rp[i] = bcol; 2411 ap[bs2*i + bs*cidx + ridx] = value; 2412 a->nz++; 2413 noinsert1:; 2414 low = i; 2415 } 2416 ailen[brow] = nrow; 2417 } 2418 A->same_nonzero = PETSC_FALSE; 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #undef __FUNCT__ 2423 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2424 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2425 { 2426 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2427 Mat outA; 2428 PetscErrorCode ierr; 2429 PetscBool row_identity,col_identity; 2430 2431 PetscFunctionBegin; 2432 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2433 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2434 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2435 if (!row_identity || !col_identity) { 2436 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2437 } 2438 2439 outA = inA; 2440 inA->factortype = MAT_FACTOR_LU; 2441 2442 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2443 2444 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2445 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2446 a->row = row; 2447 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2448 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2449 a->col = col; 2450 2451 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2452 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2453 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2454 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2455 2456 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2457 if (!a->solve_work) { 2458 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2459 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2460 } 2461 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2462 2463 PetscFunctionReturn(0); 2464 } 2465 2466 EXTERN_C_BEGIN 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2469 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2470 { 2471 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2472 PetscInt i,nz,mbs; 2473 2474 PetscFunctionBegin; 2475 nz = baij->maxnz; 2476 mbs = baij->mbs; 2477 for (i=0; i<nz; i++) { 2478 baij->j[i] = indices[i]; 2479 } 2480 baij->nz = nz; 2481 for (i=0; i<mbs; i++) { 2482 baij->ilen[i] = baij->imax[i]; 2483 } 2484 PetscFunctionReturn(0); 2485 } 2486 EXTERN_C_END 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2490 /*@ 2491 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2492 in the matrix. 2493 2494 Input Parameters: 2495 + mat - the SeqBAIJ matrix 2496 - indices - the column indices 2497 2498 Level: advanced 2499 2500 Notes: 2501 This can be called if you have precomputed the nonzero structure of the 2502 matrix and want to provide it to the matrix object to improve the performance 2503 of the MatSetValues() operation. 2504 2505 You MUST have set the correct numbers of nonzeros per row in the call to 2506 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2507 2508 MUST be called before any calls to MatSetValues(); 2509 2510 @*/ 2511 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2512 { 2513 PetscErrorCode ierr; 2514 2515 PetscFunctionBegin; 2516 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2517 PetscValidPointer(indices,2); 2518 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2524 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2525 { 2526 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2527 PetscErrorCode ierr; 2528 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2529 PetscReal atmp; 2530 PetscScalar *x,zero = 0.0; 2531 MatScalar *aa; 2532 PetscInt ncols,brow,krow,kcol; 2533 2534 PetscFunctionBegin; 2535 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2536 bs = A->rmap->bs; 2537 aa = a->a; 2538 ai = a->i; 2539 aj = a->j; 2540 mbs = a->mbs; 2541 2542 ierr = VecSet(v,zero);CHKERRQ(ierr); 2543 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2544 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2545 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2546 for (i=0; i<mbs; i++) { 2547 ncols = ai[1] - ai[0]; ai++; 2548 brow = bs*i; 2549 for (j=0; j<ncols; j++){ 2550 for (kcol=0; kcol<bs; kcol++){ 2551 for (krow=0; krow<bs; krow++){ 2552 atmp = PetscAbsScalar(*aa);aa++; 2553 row = brow + krow; /* row index */ 2554 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2555 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2556 } 2557 } 2558 aj++; 2559 } 2560 } 2561 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 #undef __FUNCT__ 2566 #define __FUNCT__ "MatCopy_SeqBAIJ" 2567 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2568 { 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 /* If the two matrices have the same copy implementation, use fast copy. */ 2573 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2574 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2575 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2576 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2577 2578 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2579 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2580 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2581 } else { 2582 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2583 } 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2589 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2590 { 2591 PetscErrorCode ierr; 2592 2593 PetscFunctionBegin; 2594 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 #undef __FUNCT__ 2599 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2600 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2601 { 2602 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2603 PetscFunctionBegin; 2604 *array = a->a; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2610 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2611 { 2612 PetscFunctionBegin; 2613 PetscFunctionReturn(0); 2614 } 2615 2616 #undef __FUNCT__ 2617 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2618 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2619 { 2620 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2621 PetscErrorCode ierr; 2622 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2623 PetscBLASInt one=1; 2624 2625 PetscFunctionBegin; 2626 if (str == SAME_NONZERO_PATTERN) { 2627 PetscScalar alpha = a; 2628 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2629 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2630 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2631 if (y->xtoy && y->XtoY != X) { 2632 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2633 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2634 } 2635 if (!y->xtoy) { /* get xtoy */ 2636 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2637 y->XtoY = X; 2638 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2639 } 2640 for (i=0; i<x->nz; i++) { 2641 j = 0; 2642 while (j < bs2){ 2643 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2644 j++; 2645 } 2646 } 2647 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2648 } else { 2649 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2650 } 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2656 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2657 { 2658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2659 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2660 MatScalar *aa = a->a; 2661 2662 PetscFunctionBegin; 2663 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2669 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2670 { 2671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2672 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2673 MatScalar *aa = a->a; 2674 2675 PetscFunctionBegin; 2676 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2677 PetscFunctionReturn(0); 2678 } 2679 2680 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2684 /* 2685 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2686 */ 2687 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2688 { 2689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2690 PetscErrorCode ierr; 2691 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2692 PetscInt nz = a->i[m],row,*jj,mr,col; 2693 2694 PetscFunctionBegin; 2695 *nn = n; 2696 if (!ia) PetscFunctionReturn(0); 2697 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2698 else { 2699 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2700 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2701 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2702 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2703 jj = a->j; 2704 for (i=0; i<nz; i++) { 2705 collengths[jj[i]]++; 2706 } 2707 cia[0] = oshift; 2708 for (i=0; i<n; i++) { 2709 cia[i+1] = cia[i] + collengths[i]; 2710 } 2711 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2712 jj = a->j; 2713 for (row=0; row<m; row++) { 2714 mr = a->i[row+1] - a->i[row]; 2715 for (i=0; i<mr; i++) { 2716 col = *jj++; 2717 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2718 } 2719 } 2720 ierr = PetscFree(collengths);CHKERRQ(ierr); 2721 *ia = cia; *ja = cja; 2722 } 2723 PetscFunctionReturn(0); 2724 } 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2728 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2729 { 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 if (!ia) PetscFunctionReturn(0); 2734 ierr = PetscFree(*ia);CHKERRQ(ierr); 2735 ierr = PetscFree(*ja);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2741 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2742 { 2743 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2744 PetscErrorCode ierr; 2745 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2746 PetscScalar dx,*y,*xx,*w3_array; 2747 PetscScalar *vscale_array; 2748 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2749 Vec w1=coloring->w1,w2=coloring->w2,w3; 2750 void *fctx = coloring->fctx; 2751 PetscBool flg = PETSC_FALSE; 2752 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2753 Vec x1_tmp; 2754 2755 PetscFunctionBegin; 2756 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2757 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2758 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2759 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2760 2761 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2762 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2763 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2764 if (flg) { 2765 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2766 } else { 2767 PetscBool assembled; 2768 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2769 if (assembled) { 2770 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2771 } 2772 } 2773 2774 x1_tmp = x1; 2775 if (!coloring->vscale){ 2776 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2777 } 2778 2779 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2780 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2781 } 2782 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2783 2784 /* Set w1 = F(x1) */ 2785 if (!coloring->fset) { 2786 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2787 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2788 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2789 } else { 2790 coloring->fset = PETSC_FALSE; 2791 } 2792 2793 if (!coloring->w3) { 2794 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2795 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2796 } 2797 w3 = coloring->w3; 2798 2799 CHKMEMQ; 2800 /* Compute all the local scale factors, including ghost points */ 2801 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2802 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2803 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2804 if (ctype == IS_COLORING_GHOSTED){ 2805 col_start = 0; col_end = N; 2806 } else if (ctype == IS_COLORING_GLOBAL){ 2807 xx = xx - start; 2808 vscale_array = vscale_array - start; 2809 col_start = start; col_end = N + start; 2810 } CHKMEMQ; 2811 for (col=col_start; col<col_end; col++){ 2812 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2813 if (coloring->htype[0] == 'w') { 2814 dx = 1.0 + unorm; 2815 } else { 2816 dx = xx[col]; 2817 } 2818 if (dx == (PetscScalar)0.0) dx = 1.0; 2819 #if !defined(PETSC_USE_COMPLEX) 2820 if (dx < umin && dx >= 0.0) dx = umin; 2821 else if (dx < 0.0 && dx > -umin) dx = -umin; 2822 #else 2823 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2824 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2825 #endif 2826 dx *= epsilon; 2827 vscale_array[col] = (PetscScalar)1.0/dx; 2828 } CHKMEMQ; 2829 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2830 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2831 if (ctype == IS_COLORING_GLOBAL){ 2832 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2833 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2834 } 2835 CHKMEMQ; 2836 if (coloring->vscaleforrow) { 2837 vscaleforrow = coloring->vscaleforrow; 2838 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2839 2840 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2841 /* 2842 Loop over each color 2843 */ 2844 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2845 for (k=0; k<coloring->ncolors; k++) { 2846 coloring->currentcolor = k; 2847 for (i=0; i<bs; i++) { 2848 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2849 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2850 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2851 /* 2852 Loop over each column associated with color 2853 adding the perturbation to the vector w3. 2854 */ 2855 for (l=0; l<coloring->ncolumns[k]; l++) { 2856 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2857 if (coloring->htype[0] == 'w') { 2858 dx = 1.0 + unorm; 2859 } else { 2860 dx = xx[col]; 2861 } 2862 if (dx == (PetscScalar)0.0) dx = 1.0; 2863 #if !defined(PETSC_USE_COMPLEX) 2864 if (dx < umin && dx >= 0.0) dx = umin; 2865 else if (dx < 0.0 && dx > -umin) dx = -umin; 2866 #else 2867 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2868 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2869 #endif 2870 dx *= epsilon; 2871 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2872 w3_array[col] += dx; 2873 } 2874 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2875 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2876 2877 /* 2878 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2879 w2 = F(x1 + dx) - F(x1) 2880 */ 2881 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2882 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2883 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2884 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2885 2886 /* 2887 Loop over rows of vector, putting results into Jacobian matrix 2888 */ 2889 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2890 for (l=0; l<coloring->nrows[k]; l++) { 2891 row = bs*coloring->rows[k][l]; /* local row index */ 2892 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2893 for (j=0; j<bs; j++) { 2894 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2895 srows[j] = row + start + j; 2896 } 2897 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2898 } 2899 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2900 } 2901 } /* endof for each color */ 2902 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2903 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2904 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2905 ierr = PetscFree(srows);CHKERRQ(ierr); 2906 2907 coloring->currentcolor = -1; 2908 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2909 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2910 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2911 PetscFunctionReturn(0); 2912 } 2913 2914 /* -------------------------------------------------------------------*/ 2915 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2916 MatGetRow_SeqBAIJ, 2917 MatRestoreRow_SeqBAIJ, 2918 MatMult_SeqBAIJ_N, 2919 /* 4*/ MatMultAdd_SeqBAIJ_N, 2920 MatMultTranspose_SeqBAIJ, 2921 MatMultTransposeAdd_SeqBAIJ, 2922 0, 2923 0, 2924 0, 2925 /*10*/ 0, 2926 MatLUFactor_SeqBAIJ, 2927 0, 2928 0, 2929 MatTranspose_SeqBAIJ, 2930 /*15*/ MatGetInfo_SeqBAIJ, 2931 MatEqual_SeqBAIJ, 2932 MatGetDiagonal_SeqBAIJ, 2933 MatDiagonalScale_SeqBAIJ, 2934 MatNorm_SeqBAIJ, 2935 /*20*/ 0, 2936 MatAssemblyEnd_SeqBAIJ, 2937 MatSetOption_SeqBAIJ, 2938 MatZeroEntries_SeqBAIJ, 2939 /*24*/ MatZeroRows_SeqBAIJ, 2940 0, 2941 0, 2942 0, 2943 0, 2944 /*29*/ MatSetUp_SeqBAIJ, 2945 0, 2946 0, 2947 0, 2948 0, 2949 /*34*/ MatDuplicate_SeqBAIJ, 2950 0, 2951 0, 2952 MatILUFactor_SeqBAIJ, 2953 0, 2954 /*39*/ MatAXPY_SeqBAIJ, 2955 MatGetSubMatrices_SeqBAIJ, 2956 MatIncreaseOverlap_SeqBAIJ, 2957 MatGetValues_SeqBAIJ, 2958 MatCopy_SeqBAIJ, 2959 /*44*/ 0, 2960 MatScale_SeqBAIJ, 2961 0, 2962 0, 2963 MatZeroRowsColumns_SeqBAIJ, 2964 /*49*/ 0, 2965 MatGetRowIJ_SeqBAIJ, 2966 MatRestoreRowIJ_SeqBAIJ, 2967 MatGetColumnIJ_SeqBAIJ, 2968 MatRestoreColumnIJ_SeqBAIJ, 2969 /*54*/ MatFDColoringCreate_SeqAIJ, 2970 0, 2971 0, 2972 0, 2973 MatSetValuesBlocked_SeqBAIJ, 2974 /*59*/ MatGetSubMatrix_SeqBAIJ, 2975 MatDestroy_SeqBAIJ, 2976 MatView_SeqBAIJ, 2977 0, 2978 0, 2979 /*64*/ 0, 2980 0, 2981 0, 2982 0, 2983 0, 2984 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2985 0, 2986 MatConvert_Basic, 2987 0, 2988 0, 2989 /*74*/ 0, 2990 MatFDColoringApply_BAIJ, 2991 0, 2992 0, 2993 0, 2994 /*79*/ 0, 2995 0, 2996 0, 2997 0, 2998 MatLoad_SeqBAIJ, 2999 /*84*/ 0, 3000 0, 3001 0, 3002 0, 3003 0, 3004 /*89*/ 0, 3005 0, 3006 0, 3007 0, 3008 0, 3009 /*94*/ 0, 3010 0, 3011 0, 3012 0, 3013 0, 3014 /*99*/0, 3015 0, 3016 0, 3017 0, 3018 0, 3019 /*104*/0, 3020 MatRealPart_SeqBAIJ, 3021 MatImaginaryPart_SeqBAIJ, 3022 0, 3023 0, 3024 /*109*/0, 3025 0, 3026 0, 3027 0, 3028 MatMissingDiagonal_SeqBAIJ, 3029 /*114*/0, 3030 0, 3031 0, 3032 0, 3033 0, 3034 /*119*/0, 3035 0, 3036 MatMultHermitianTranspose_SeqBAIJ, 3037 MatMultHermitianTransposeAdd_SeqBAIJ, 3038 0, 3039 /*124*/0, 3040 0, 3041 MatInvertBlockDiagonal_SeqBAIJ 3042 }; 3043 3044 EXTERN_C_BEGIN 3045 #undef __FUNCT__ 3046 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3047 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3048 { 3049 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3050 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3051 PetscErrorCode ierr; 3052 3053 PetscFunctionBegin; 3054 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3055 3056 /* allocate space for values if not already there */ 3057 if (!aij->saved_values) { 3058 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3059 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3060 } 3061 3062 /* copy values over */ 3063 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3064 PetscFunctionReturn(0); 3065 } 3066 EXTERN_C_END 3067 3068 EXTERN_C_BEGIN 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3071 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3072 { 3073 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3074 PetscErrorCode ierr; 3075 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3076 3077 PetscFunctionBegin; 3078 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3079 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3080 3081 /* copy values over */ 3082 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 EXTERN_C_END 3086 3087 EXTERN_C_BEGIN 3088 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3089 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3090 EXTERN_C_END 3091 3092 EXTERN_C_BEGIN 3093 #undef __FUNCT__ 3094 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3095 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3096 { 3097 Mat_SeqBAIJ *b; 3098 PetscErrorCode ierr; 3099 PetscInt i,mbs,nbs,bs2; 3100 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3101 3102 PetscFunctionBegin; 3103 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3104 if (nz == MAT_SKIP_ALLOCATION) { 3105 skipallocation = PETSC_TRUE; 3106 nz = 0; 3107 } 3108 3109 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3110 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3111 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3112 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3113 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3114 3115 B->preallocated = PETSC_TRUE; 3116 3117 mbs = B->rmap->n/bs; 3118 nbs = B->cmap->n/bs; 3119 bs2 = bs*bs; 3120 3121 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3122 3123 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3124 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3125 if (nnz) { 3126 for (i=0; i<mbs; i++) { 3127 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3128 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3129 } 3130 } 3131 3132 b = (Mat_SeqBAIJ*)B->data; 3133 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3134 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3135 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3136 3137 if (!flg) { 3138 switch (bs) { 3139 case 1: 3140 B->ops->mult = MatMult_SeqBAIJ_1; 3141 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3142 B->ops->sor = MatSOR_SeqBAIJ_1; 3143 break; 3144 case 2: 3145 B->ops->mult = MatMult_SeqBAIJ_2; 3146 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3147 B->ops->sor = MatSOR_SeqBAIJ_2; 3148 break; 3149 case 3: 3150 B->ops->mult = MatMult_SeqBAIJ_3; 3151 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3152 B->ops->sor = MatSOR_SeqBAIJ_3; 3153 break; 3154 case 4: 3155 B->ops->mult = MatMult_SeqBAIJ_4; 3156 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3157 B->ops->sor = MatSOR_SeqBAIJ_4; 3158 break; 3159 case 5: 3160 B->ops->mult = MatMult_SeqBAIJ_5; 3161 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3162 B->ops->sor = MatSOR_SeqBAIJ_5; 3163 break; 3164 case 6: 3165 B->ops->mult = MatMult_SeqBAIJ_6; 3166 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3167 B->ops->sor = MatSOR_SeqBAIJ_6; 3168 break; 3169 case 7: 3170 B->ops->mult = MatMult_SeqBAIJ_7; 3171 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3172 B->ops->sor = MatSOR_SeqBAIJ_7; 3173 break; 3174 case 15: 3175 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3176 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3177 B->ops->sor = MatSOR_SeqBAIJ_N; 3178 break; 3179 default: 3180 B->ops->mult = MatMult_SeqBAIJ_N; 3181 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3182 B->ops->sor = MatSOR_SeqBAIJ_N; 3183 break; 3184 } 3185 } 3186 b->mbs = mbs; 3187 b->nbs = nbs; 3188 if (!skipallocation) { 3189 if (!b->imax) { 3190 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3191 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3192 b->free_imax_ilen = PETSC_TRUE; 3193 } 3194 /* b->ilen will count nonzeros in each block row so far. */ 3195 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3196 if (!nnz) { 3197 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3198 else if (nz < 0) nz = 1; 3199 for (i=0; i<mbs; i++) b->imax[i] = nz; 3200 nz = nz*mbs; 3201 } else { 3202 nz = 0; 3203 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3204 } 3205 3206 /* allocate the matrix space */ 3207 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3208 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3209 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3210 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3211 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3212 b->singlemalloc = PETSC_TRUE; 3213 b->i[0] = 0; 3214 for (i=1; i<mbs+1; i++) { 3215 b->i[i] = b->i[i-1] + b->imax[i-1]; 3216 } 3217 b->free_a = PETSC_TRUE; 3218 b->free_ij = PETSC_TRUE; 3219 } else { 3220 b->free_a = PETSC_FALSE; 3221 b->free_ij = PETSC_FALSE; 3222 } 3223 3224 b->bs2 = bs2; 3225 b->mbs = mbs; 3226 b->nz = 0; 3227 b->maxnz = nz; 3228 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3229 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3230 PetscFunctionReturn(0); 3231 } 3232 EXTERN_C_END 3233 3234 EXTERN_C_BEGIN 3235 #undef __FUNCT__ 3236 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3237 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3238 { 3239 PetscInt i,m,nz,nz_max=0,*nnz; 3240 PetscScalar *values=0; 3241 PetscErrorCode ierr; 3242 3243 PetscFunctionBegin; 3244 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3245 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3246 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3247 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3248 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3249 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3250 m = B->rmap->n/bs; 3251 3252 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3253 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3254 for (i=0; i<m; i++) { 3255 nz = ii[i+1]- ii[i]; 3256 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3257 nz_max = PetscMax(nz_max, nz); 3258 nnz[i] = nz; 3259 } 3260 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3261 ierr = PetscFree(nnz);CHKERRQ(ierr); 3262 3263 values = (PetscScalar*)V; 3264 if (!values) { 3265 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3266 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3267 } 3268 for (i=0; i<m; i++) { 3269 PetscInt ncols = ii[i+1] - ii[i]; 3270 const PetscInt *icols = jj + ii[i]; 3271 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3272 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3273 } 3274 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3275 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3276 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3277 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3278 PetscFunctionReturn(0); 3279 } 3280 EXTERN_C_END 3281 3282 3283 EXTERN_C_BEGIN 3284 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3285 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3286 #if defined(PETSC_HAVE_MUMPS) 3287 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3288 #endif 3289 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3290 EXTERN_C_END 3291 3292 /*MC 3293 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3294 block sparse compressed row format. 3295 3296 Options Database Keys: 3297 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3298 3299 Level: beginner 3300 3301 .seealso: MatCreateSeqBAIJ() 3302 M*/ 3303 3304 EXTERN_C_BEGIN 3305 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3306 EXTERN_C_END 3307 3308 EXTERN_C_BEGIN 3309 #undef __FUNCT__ 3310 #define __FUNCT__ "MatCreate_SeqBAIJ" 3311 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3312 { 3313 PetscErrorCode ierr; 3314 PetscMPIInt size; 3315 Mat_SeqBAIJ *b; 3316 3317 PetscFunctionBegin; 3318 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3319 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3320 3321 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3322 B->data = (void*)b; 3323 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3324 b->row = 0; 3325 b->col = 0; 3326 b->icol = 0; 3327 b->reallocs = 0; 3328 b->saved_values = 0; 3329 3330 b->roworiented = PETSC_TRUE; 3331 b->nonew = 0; 3332 b->diag = 0; 3333 b->solve_work = 0; 3334 b->mult_work = 0; 3335 B->spptr = 0; 3336 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3337 b->keepnonzeropattern = PETSC_FALSE; 3338 b->xtoy = 0; 3339 b->XtoY = 0; 3340 B->same_nonzero = PETSC_FALSE; 3341 3342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3343 "MatGetFactorAvailable_seqbaij_petsc", 3344 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3346 "MatGetFactor_seqbaij_petsc", 3347 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3348 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3349 "MatGetFactor_seqbaij_bstrm", 3350 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3351 #if defined(PETSC_HAVE_MUMPS) 3352 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3353 #endif 3354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3355 "MatInvertBlockDiagonal_SeqBAIJ", 3356 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3358 "MatStoreValues_SeqBAIJ", 3359 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3361 "MatRetrieveValues_SeqBAIJ", 3362 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3364 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3365 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3367 "MatConvert_SeqBAIJ_SeqAIJ", 3368 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3370 "MatConvert_SeqBAIJ_SeqSBAIJ", 3371 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3373 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3374 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3376 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3377 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3379 "MatConvert_SeqBAIJ_SeqBSTRM", 3380 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3382 "MatIsTranspose_SeqBAIJ", 3383 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3384 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3385 PetscFunctionReturn(0); 3386 } 3387 EXTERN_C_END 3388 3389 #undef __FUNCT__ 3390 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3391 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3392 { 3393 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3394 PetscErrorCode ierr; 3395 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3396 3397 PetscFunctionBegin; 3398 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3399 3400 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3401 c->imax = a->imax; 3402 c->ilen = a->ilen; 3403 c->free_imax_ilen = PETSC_FALSE; 3404 } else { 3405 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3406 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3407 for (i=0; i<mbs; i++) { 3408 c->imax[i] = a->imax[i]; 3409 c->ilen[i] = a->ilen[i]; 3410 } 3411 c->free_imax_ilen = PETSC_TRUE; 3412 } 3413 3414 /* allocate the matrix space */ 3415 if (mallocmatspace){ 3416 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3417 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3418 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3419 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3420 c->i = a->i; 3421 c->j = a->j; 3422 c->singlemalloc = PETSC_FALSE; 3423 c->free_a = PETSC_TRUE; 3424 c->free_ij = PETSC_FALSE; 3425 c->parent = A; 3426 C->preallocated = PETSC_TRUE; 3427 C->assembled = PETSC_TRUE; 3428 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3429 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3430 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3431 } else { 3432 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3433 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3434 c->singlemalloc = PETSC_TRUE; 3435 c->free_a = PETSC_TRUE; 3436 c->free_ij = PETSC_TRUE; 3437 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3438 if (mbs > 0) { 3439 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3440 if (cpvalues == MAT_COPY_VALUES) { 3441 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3442 } else { 3443 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3444 } 3445 } 3446 C->preallocated = PETSC_TRUE; 3447 C->assembled = PETSC_TRUE; 3448 } 3449 } 3450 3451 c->roworiented = a->roworiented; 3452 c->nonew = a->nonew; 3453 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3454 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3455 c->bs2 = a->bs2; 3456 c->mbs = a->mbs; 3457 c->nbs = a->nbs; 3458 3459 if (a->diag) { 3460 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3461 c->diag = a->diag; 3462 c->free_diag = PETSC_FALSE; 3463 } else { 3464 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3465 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3466 for (i=0; i<mbs; i++) { 3467 c->diag[i] = a->diag[i]; 3468 } 3469 c->free_diag = PETSC_TRUE; 3470 } 3471 } else c->diag = 0; 3472 c->nz = a->nz; 3473 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3474 c->solve_work = 0; 3475 c->mult_work = 0; 3476 3477 c->compressedrow.use = a->compressedrow.use; 3478 c->compressedrow.nrows = a->compressedrow.nrows; 3479 c->compressedrow.check = a->compressedrow.check; 3480 if (a->compressedrow.use){ 3481 i = a->compressedrow.nrows; 3482 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3483 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3484 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3485 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3486 } else { 3487 c->compressedrow.use = PETSC_FALSE; 3488 c->compressedrow.i = PETSC_NULL; 3489 c->compressedrow.rindex = PETSC_NULL; 3490 } 3491 C->same_nonzero = A->same_nonzero; 3492 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3493 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3494 PetscFunctionReturn(0); 3495 } 3496 3497 #undef __FUNCT__ 3498 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3499 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3500 { 3501 PetscErrorCode ierr; 3502 3503 PetscFunctionBegin; 3504 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3505 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3506 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3507 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3508 PetscFunctionReturn(0); 3509 } 3510 3511 #undef __FUNCT__ 3512 #define __FUNCT__ "MatLoad_SeqBAIJ" 3513 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3514 { 3515 Mat_SeqBAIJ *a; 3516 PetscErrorCode ierr; 3517 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3518 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3519 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3520 PetscInt *masked,nmask,tmp,bs2,ishift; 3521 PetscMPIInt size; 3522 int fd; 3523 PetscScalar *aa; 3524 MPI_Comm comm = ((PetscObject)viewer)->comm; 3525 3526 PetscFunctionBegin; 3527 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3528 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3529 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3530 bs2 = bs*bs; 3531 3532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3533 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3534 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3535 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3536 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3537 M = header[1]; N = header[2]; nz = header[3]; 3538 3539 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3540 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3541 3542 /* 3543 This code adds extra rows to make sure the number of rows is 3544 divisible by the blocksize 3545 */ 3546 mbs = M/bs; 3547 extra_rows = bs - M + bs*(mbs); 3548 if (extra_rows == bs) extra_rows = 0; 3549 else mbs++; 3550 if (extra_rows) { 3551 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3552 } 3553 3554 /* Set global sizes if not already set */ 3555 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3556 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3557 } else { /* Check if the matrix global sizes are correct */ 3558 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3559 if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */ 3560 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3561 } 3562 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3563 } 3564 3565 /* read in row lengths */ 3566 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3567 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3568 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3569 3570 /* read in column indices */ 3571 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3572 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3573 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3574 3575 /* loop over row lengths determining block row lengths */ 3576 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3577 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3578 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3579 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3580 rowcount = 0; 3581 nzcount = 0; 3582 for (i=0; i<mbs; i++) { 3583 nmask = 0; 3584 for (j=0; j<bs; j++) { 3585 kmax = rowlengths[rowcount]; 3586 for (k=0; k<kmax; k++) { 3587 tmp = jj[nzcount++]/bs; 3588 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3589 } 3590 rowcount++; 3591 } 3592 browlengths[i] += nmask; 3593 /* zero out the mask elements we set */ 3594 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3595 } 3596 3597 /* Do preallocation */ 3598 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3599 a = (Mat_SeqBAIJ*)newmat->data; 3600 3601 /* set matrix "i" values */ 3602 a->i[0] = 0; 3603 for (i=1; i<= mbs; i++) { 3604 a->i[i] = a->i[i-1] + browlengths[i-1]; 3605 a->ilen[i-1] = browlengths[i-1]; 3606 } 3607 a->nz = 0; 3608 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3609 3610 /* read in nonzero values */ 3611 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3612 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3613 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3614 3615 /* set "a" and "j" values into matrix */ 3616 nzcount = 0; jcount = 0; 3617 for (i=0; i<mbs; i++) { 3618 nzcountb = nzcount; 3619 nmask = 0; 3620 for (j=0; j<bs; j++) { 3621 kmax = rowlengths[i*bs+j]; 3622 for (k=0; k<kmax; k++) { 3623 tmp = jj[nzcount++]/bs; 3624 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3625 } 3626 } 3627 /* sort the masked values */ 3628 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3629 3630 /* set "j" values into matrix */ 3631 maskcount = 1; 3632 for (j=0; j<nmask; j++) { 3633 a->j[jcount++] = masked[j]; 3634 mask[masked[j]] = maskcount++; 3635 } 3636 /* set "a" values into matrix */ 3637 ishift = bs2*a->i[i]; 3638 for (j=0; j<bs; j++) { 3639 kmax = rowlengths[i*bs+j]; 3640 for (k=0; k<kmax; k++) { 3641 tmp = jj[nzcountb]/bs ; 3642 block = mask[tmp] - 1; 3643 point = jj[nzcountb] - bs*tmp; 3644 idx = ishift + bs2*block + j + bs*point; 3645 a->a[idx] = (MatScalar)aa[nzcountb++]; 3646 } 3647 } 3648 /* zero out the mask elements we set */ 3649 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3650 } 3651 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3652 3653 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3654 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3655 ierr = PetscFree(aa);CHKERRQ(ierr); 3656 ierr = PetscFree(jj);CHKERRQ(ierr); 3657 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3658 3659 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3660 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3661 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3662 PetscFunctionReturn(0); 3663 } 3664 3665 #undef __FUNCT__ 3666 #define __FUNCT__ "MatCreateSeqBAIJ" 3667 /*@C 3668 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3669 compressed row) format. For good matrix assembly performance the 3670 user should preallocate the matrix storage by setting the parameter nz 3671 (or the array nnz). By setting these parameters accurately, performance 3672 during matrix assembly can be increased by more than a factor of 50. 3673 3674 Collective on MPI_Comm 3675 3676 Input Parameters: 3677 + comm - MPI communicator, set to PETSC_COMM_SELF 3678 . bs - size of block 3679 . m - number of rows 3680 . n - number of columns 3681 . nz - number of nonzero blocks per block row (same for all rows) 3682 - nnz - array containing the number of nonzero blocks in the various block rows 3683 (possibly different for each block row) or PETSC_NULL 3684 3685 Output Parameter: 3686 . A - the matrix 3687 3688 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3689 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3690 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3691 3692 Options Database Keys: 3693 . -mat_no_unroll - uses code that does not unroll the loops in the 3694 block calculations (much slower) 3695 . -mat_block_size - size of the blocks to use 3696 3697 Level: intermediate 3698 3699 Notes: 3700 The number of rows and columns must be divisible by blocksize. 3701 3702 If the nnz parameter is given then the nz parameter is ignored 3703 3704 A nonzero block is any block that as 1 or more nonzeros in it 3705 3706 The block AIJ format is fully compatible with standard Fortran 77 3707 storage. That is, the stored row and column indices can begin at 3708 either one (as in Fortran) or zero. See the users' manual for details. 3709 3710 Specify the preallocated storage with either nz or nnz (not both). 3711 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3712 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3713 matrices. 3714 3715 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3716 @*/ 3717 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3718 { 3719 PetscErrorCode ierr; 3720 3721 PetscFunctionBegin; 3722 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3723 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3724 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3725 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3726 PetscFunctionReturn(0); 3727 } 3728 3729 #undef __FUNCT__ 3730 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3731 /*@C 3732 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3733 per row in the matrix. For good matrix assembly performance the 3734 user should preallocate the matrix storage by setting the parameter nz 3735 (or the array nnz). By setting these parameters accurately, performance 3736 during matrix assembly can be increased by more than a factor of 50. 3737 3738 Collective on MPI_Comm 3739 3740 Input Parameters: 3741 + A - the matrix 3742 . bs - size of block 3743 . nz - number of block nonzeros per block row (same for all rows) 3744 - nnz - array containing the number of block nonzeros in the various block rows 3745 (possibly different for each block row) or PETSC_NULL 3746 3747 Options Database Keys: 3748 . -mat_no_unroll - uses code that does not unroll the loops in the 3749 block calculations (much slower) 3750 . -mat_block_size - size of the blocks to use 3751 3752 Level: intermediate 3753 3754 Notes: 3755 If the nnz parameter is given then the nz parameter is ignored 3756 3757 You can call MatGetInfo() to get information on how effective the preallocation was; 3758 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3759 You can also run with the option -info and look for messages with the string 3760 malloc in them to see if additional memory allocation was needed. 3761 3762 The block AIJ format is fully compatible with standard Fortran 77 3763 storage. That is, the stored row and column indices can begin at 3764 either one (as in Fortran) or zero. See the users' manual for details. 3765 3766 Specify the preallocated storage with either nz or nnz (not both). 3767 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3768 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3769 3770 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3771 @*/ 3772 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3773 { 3774 PetscErrorCode ierr; 3775 3776 PetscFunctionBegin; 3777 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3778 PetscValidType(B,1); 3779 PetscValidLogicalCollectiveInt(B,bs,2); 3780 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3781 PetscFunctionReturn(0); 3782 } 3783 3784 #undef __FUNCT__ 3785 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3786 /*@C 3787 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3788 (the default sequential PETSc format). 3789 3790 Collective on MPI_Comm 3791 3792 Input Parameters: 3793 + A - the matrix 3794 . i - the indices into j for the start of each local row (starts with zero) 3795 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3796 - v - optional values in the matrix 3797 3798 Level: developer 3799 3800 .keywords: matrix, aij, compressed row, sparse 3801 3802 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3803 @*/ 3804 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3805 { 3806 PetscErrorCode ierr; 3807 3808 PetscFunctionBegin; 3809 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3810 PetscValidType(B,1); 3811 PetscValidLogicalCollectiveInt(B,bs,2); 3812 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3813 PetscFunctionReturn(0); 3814 } 3815 3816 3817 #undef __FUNCT__ 3818 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3819 /*@ 3820 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3821 3822 Collective on MPI_Comm 3823 3824 Input Parameters: 3825 + comm - must be an MPI communicator of size 1 3826 . bs - size of block 3827 . m - number of rows 3828 . n - number of columns 3829 . i - row indices 3830 . j - column indices 3831 - a - matrix values 3832 3833 Output Parameter: 3834 . mat - the matrix 3835 3836 Level: advanced 3837 3838 Notes: 3839 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3840 once the matrix is destroyed 3841 3842 You cannot set new nonzero locations into this matrix, that will generate an error. 3843 3844 The i and j indices are 0 based 3845 3846 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3847 3848 3849 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3850 3851 @*/ 3852 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3853 { 3854 PetscErrorCode ierr; 3855 PetscInt ii; 3856 Mat_SeqBAIJ *baij; 3857 3858 PetscFunctionBegin; 3859 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3860 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3861 3862 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3863 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3864 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3865 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3866 baij = (Mat_SeqBAIJ*)(*mat)->data; 3867 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3868 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3869 3870 baij->i = i; 3871 baij->j = j; 3872 baij->a = a; 3873 baij->singlemalloc = PETSC_FALSE; 3874 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3875 baij->free_a = PETSC_FALSE; 3876 baij->free_ij = PETSC_FALSE; 3877 3878 for (ii=0; ii<m; ii++) { 3879 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3880 #if defined(PETSC_USE_DEBUG) 3881 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3882 #endif 3883 } 3884 #if defined(PETSC_USE_DEBUG) 3885 for (ii=0; ii<baij->i[m]; ii++) { 3886 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3887 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3888 } 3889 #endif 3890 3891 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 PetscFunctionReturn(0); 3894 } 3895