1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) PetscFunctionReturn(0); 23 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 24 diag_offset = a->diag; 25 if (!a->idiag) { 26 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 27 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 28 } 29 diag = a->idiag; 30 mdiag = a->idiag+bs2*mbs; 31 if (values) *values = a->idiag; 32 /* factor and invert each block */ 33 switch (bs){ 34 case 1: 35 for (i=0; i<mbs; i++) { 36 odiag = v + 1*diag_offset[i]; 37 diag[0] = odiag[0]; 38 mdiag[0] = odiag[0]; 39 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 40 diag += 1; 41 mdiag += 1; 42 } 43 break; 44 case 2: 45 for (i=0; i<mbs; i++) { 46 odiag = v + 4*diag_offset[i]; 47 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 48 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 49 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 50 diag += 4; 51 mdiag += 4; 52 } 53 break; 54 case 3: 55 for (i=0; i<mbs; i++) { 56 odiag = v + 9*diag_offset[i]; 57 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 58 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 59 diag[8] = odiag[8]; 60 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 61 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 62 mdiag[8] = odiag[8]; 63 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 64 diag += 9; 65 mdiag += 9; 66 } 67 break; 68 case 4: 69 for (i=0; i<mbs; i++) { 70 odiag = v + 16*diag_offset[i]; 71 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 72 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 73 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 74 diag += 16; 75 mdiag += 16; 76 } 77 break; 78 case 5: 79 for (i=0; i<mbs; i++) { 80 odiag = v + 25*diag_offset[i]; 81 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 82 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 83 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 84 diag += 25; 85 mdiag += 25; 86 } 87 break; 88 case 6: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 36*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 94 diag += 36; 95 mdiag += 36; 96 } 97 break; 98 case 7: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 49*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 104 diag += 49; 105 mdiag += 49; 106 } 107 break; 108 default: 109 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 110 for (i=0; i<mbs; i++) { 111 odiag = v + bs2*diag_offset[i]; 112 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 115 diag += bs2; 116 mdiag += bs2; 117 } 118 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 119 } 120 a->idiagvalid = PETSC_TRUE; 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 126 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 127 { 128 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 129 PetscScalar *x,x1,s1; 130 const PetscScalar *b; 131 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 132 PetscErrorCode ierr; 133 PetscInt m = a->mbs,i,i2,nz,j; 134 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 135 136 PetscFunctionBegin; 137 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 138 its = its*lits; 139 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 140 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 141 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 142 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 143 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 144 145 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 146 147 diag = a->diag; 148 idiag = a->idiag; 149 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 150 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 151 152 if (flag & SOR_ZERO_INITIAL_GUESS) { 153 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 154 x[0] = b[0]*idiag[0]; 155 i2 = 1; 156 idiag += 1; 157 for (i=1; i<m; i++) { 158 v = aa + ai[i]; 159 vi = aj + ai[i]; 160 nz = diag[i] - ai[i]; 161 s1 = b[i2]; 162 for (j=0; j<nz; j++) { 163 s1 -= v[j]*x[vi[j]]; 164 } 165 x[i2] = idiag[0]*s1; 166 idiag += 1; 167 i2 += 1; 168 } 169 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 170 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 171 } 172 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 173 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 174 i2 = 0; 175 mdiag = a->idiag+a->mbs; 176 for (i=0; i<m; i++) { 177 x1 = x[i2]; 178 x[i2] = mdiag[0]*x1; 179 mdiag += 1; 180 i2 += 1; 181 } 182 ierr = PetscLogFlops(m);CHKERRQ(ierr); 183 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 184 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 185 } 186 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 187 idiag = a->idiag+a->mbs - 1; 188 i2 = m - 1; 189 x1 = x[i2]; 190 x[i2] = idiag[0]*x1; 191 idiag -= 1; 192 i2 -= 1; 193 for (i=m-2; i>=0; i--) { 194 v = aa + (diag[i]+1); 195 vi = aj + diag[i] + 1; 196 nz = ai[i+1] - diag[i] - 1; 197 s1 = x[i2]; 198 for (j=0; j<nz; j++) { 199 s1 -= v[j]*x[vi[j]]; 200 } 201 x[i2] = idiag[0]*s1; 202 idiag -= 1; 203 i2 -= 1; 204 } 205 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 206 } 207 } else { 208 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 209 } 210 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 211 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 212 PetscFunctionReturn(0); 213 } 214 215 #undef __FUNCT__ 216 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 217 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 220 PetscScalar *x,x1,x2,s1,s2; 221 const PetscScalar *b; 222 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 223 PetscErrorCode ierr; 224 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 225 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 226 227 PetscFunctionBegin; 228 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 229 its = its*lits; 230 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 231 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 232 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 233 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 234 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 235 236 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 237 238 diag = a->diag; 239 idiag = a->idiag; 240 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 241 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 242 243 if (flag & SOR_ZERO_INITIAL_GUESS) { 244 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 245 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 246 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 247 i2 = 2; 248 idiag += 4; 249 for (i=1; i<m; i++) { 250 v = aa + 4*ai[i]; 251 vi = aj + ai[i]; 252 nz = diag[i] - ai[i]; 253 s1 = b[i2]; s2 = b[i2+1]; 254 for (j=0; j<nz; j++) { 255 idx = 2*vi[j]; 256 it = 4*j; 257 x1 = x[idx]; x2 = x[1+idx]; 258 s1 -= v[it]*x1 + v[it+2]*x2; 259 s2 -= v[it+1]*x1 + v[it+3]*x2; 260 } 261 x[i2] = idiag[0]*s1 + idiag[2]*s2; 262 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 263 idiag += 4; 264 i2 += 2; 265 } 266 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 267 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 268 } 269 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 270 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 271 i2 = 0; 272 mdiag = a->idiag+4*a->mbs; 273 for (i=0; i<m; i++) { 274 x1 = x[i2]; x2 = x[i2+1]; 275 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 276 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 277 mdiag += 4; 278 i2 += 2; 279 } 280 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 281 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 282 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 283 } 284 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 285 idiag = a->idiag+4*a->mbs - 4; 286 i2 = 2*m - 2; 287 x1 = x[i2]; x2 = x[i2+1]; 288 x[i2] = idiag[0]*x1 + idiag[2]*x2; 289 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 290 idiag -= 4; 291 i2 -= 2; 292 for (i=m-2; i>=0; i--) { 293 v = aa + 4*(diag[i]+1); 294 vi = aj + diag[i] + 1; 295 nz = ai[i+1] - diag[i] - 1; 296 s1 = x[i2]; s2 = x[i2+1]; 297 for (j=0; j<nz; j++) { 298 idx = 2*vi[j]; 299 it = 4*j; 300 x1 = x[idx]; x2 = x[1+idx]; 301 s1 -= v[it]*x1 + v[it+2]*x2; 302 s2 -= v[it+1]*x1 + v[it+3]*x2; 303 } 304 x[i2] = idiag[0]*s1 + idiag[2]*s2; 305 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 306 idiag -= 4; 307 i2 -= 2; 308 } 309 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 310 } 311 } else { 312 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 313 } 314 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 315 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNCT__ 320 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 321 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 322 { 323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 324 PetscScalar *x,x1,x2,x3,s1,s2,s3; 325 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 326 const PetscScalar *b; 327 PetscErrorCode ierr; 328 PetscInt m = a->mbs,i,i2,nz,idx; 329 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 330 331 PetscFunctionBegin; 332 its = its*lits; 333 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 334 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 335 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 336 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 337 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 338 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 339 340 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 341 342 diag = a->diag; 343 idiag = a->idiag; 344 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 345 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 346 347 if (flag & SOR_ZERO_INITIAL_GUESS) { 348 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 349 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 350 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 351 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 352 i2 = 3; 353 idiag += 9; 354 for (i=1; i<m; i++) { 355 v = aa + 9*ai[i]; 356 vi = aj + ai[i]; 357 nz = diag[i] - ai[i]; 358 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 359 while (nz--) { 360 idx = 3*(*vi++); 361 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 362 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 363 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 364 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 365 v += 9; 366 } 367 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 368 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 369 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 370 idiag += 9; 371 i2 += 3; 372 } 373 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 374 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 375 } 376 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 377 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 378 i2 = 0; 379 mdiag = a->idiag+9*a->mbs; 380 for (i=0; i<m; i++) { 381 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 382 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 383 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 384 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 385 mdiag += 9; 386 i2 += 3; 387 } 388 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 389 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 390 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 393 idiag = a->idiag+9*a->mbs - 9; 394 i2 = 3*m - 3; 395 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 396 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 397 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 398 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 399 idiag -= 9; 400 i2 -= 3; 401 for (i=m-2; i>=0; i--) { 402 v = aa + 9*(diag[i]+1); 403 vi = aj + diag[i] + 1; 404 nz = ai[i+1] - diag[i] - 1; 405 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 406 while (nz--) { 407 idx = 3*(*vi++); 408 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 409 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 410 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 411 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 412 v += 9; 413 } 414 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 415 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 416 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 417 idiag -= 9; 418 i2 -= 3; 419 } 420 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 421 } 422 } else { 423 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 424 } 425 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 426 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 432 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 433 { 434 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 435 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 436 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 437 const PetscScalar *b; 438 PetscErrorCode ierr; 439 PetscInt m = a->mbs,i,i2,nz,idx; 440 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 441 442 PetscFunctionBegin; 443 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 444 its = its*lits; 445 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 446 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 447 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 448 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 449 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 450 451 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 452 453 diag = a->diag; 454 idiag = a->idiag; 455 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 456 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 457 458 if (flag & SOR_ZERO_INITIAL_GUESS) { 459 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 460 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 461 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 462 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 463 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 464 i2 = 4; 465 idiag += 16; 466 for (i=1; i<m; i++) { 467 v = aa + 16*ai[i]; 468 vi = aj + ai[i]; 469 nz = diag[i] - ai[i]; 470 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 471 while (nz--) { 472 idx = 4*(*vi++); 473 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 474 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 475 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 476 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 477 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 478 v += 16; 479 } 480 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 481 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 482 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 483 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 484 idiag += 16; 485 i2 += 4; 486 } 487 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 488 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 489 } 490 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 491 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 492 i2 = 0; 493 mdiag = a->idiag+16*a->mbs; 494 for (i=0; i<m; i++) { 495 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 496 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 497 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 498 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 499 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 500 mdiag += 16; 501 i2 += 4; 502 } 503 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 504 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 505 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 506 } 507 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 508 idiag = a->idiag+16*a->mbs - 16; 509 i2 = 4*m - 4; 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 511 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 512 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 513 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 514 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 515 idiag -= 16; 516 i2 -= 4; 517 for (i=m-2; i>=0; i--) { 518 v = aa + 16*(diag[i]+1); 519 vi = aj + diag[i] + 1; 520 nz = ai[i+1] - diag[i] - 1; 521 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 522 while (nz--) { 523 idx = 4*(*vi++); 524 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 525 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 526 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 527 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 528 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 529 v += 16; 530 } 531 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 532 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 533 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 534 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 535 idiag -= 16; 536 i2 -= 4; 537 } 538 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 539 } 540 } else { 541 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 542 } 543 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 544 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 550 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 551 { 552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 553 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 554 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 555 const PetscScalar *b; 556 PetscErrorCode ierr; 557 PetscInt m = a->mbs,i,i2,nz,idx; 558 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 559 560 PetscFunctionBegin; 561 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 562 its = its*lits; 563 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 564 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 565 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 566 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 567 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 568 569 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 570 571 diag = a->diag; 572 idiag = a->idiag; 573 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 574 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 575 576 if (flag & SOR_ZERO_INITIAL_GUESS) { 577 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 578 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 579 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 580 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 581 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 582 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 583 i2 = 5; 584 idiag += 25; 585 for (i=1; i<m; i++) { 586 v = aa + 25*ai[i]; 587 vi = aj + ai[i]; 588 nz = diag[i] - ai[i]; 589 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 590 while (nz--) { 591 idx = 5*(*vi++); 592 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 593 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 594 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 595 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 596 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 597 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 598 v += 25; 599 } 600 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 601 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 602 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 603 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 604 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 605 idiag += 25; 606 i2 += 5; 607 } 608 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 609 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 610 } 611 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 612 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 613 i2 = 0; 614 mdiag = a->idiag+25*a->mbs; 615 for (i=0; i<m; i++) { 616 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 617 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 618 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 619 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 620 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 621 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 622 mdiag += 25; 623 i2 += 5; 624 } 625 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 626 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 627 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 628 } 629 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 630 idiag = a->idiag+25*a->mbs - 25; 631 i2 = 5*m - 5; 632 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 633 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 634 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 635 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 636 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 637 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 638 idiag -= 25; 639 i2 -= 5; 640 for (i=m-2; i>=0; i--) { 641 v = aa + 25*(diag[i]+1); 642 vi = aj + diag[i] + 1; 643 nz = ai[i+1] - diag[i] - 1; 644 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 645 while (nz--) { 646 idx = 5*(*vi++); 647 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 648 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 649 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 650 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 651 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 652 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 653 v += 25; 654 } 655 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 656 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 657 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 658 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 659 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 660 idiag -= 25; 661 i2 -= 5; 662 } 663 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 664 } 665 } else { 666 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 667 } 668 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 669 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 670 PetscFunctionReturn(0); 671 } 672 673 #undef __FUNCT__ 674 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 675 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 676 { 677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 678 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 679 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 680 const PetscScalar *b; 681 PetscErrorCode ierr; 682 PetscInt m = a->mbs,i,i2,nz,idx; 683 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 684 685 PetscFunctionBegin; 686 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 687 its = its*lits; 688 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 689 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 690 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 691 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 692 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 693 694 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 695 696 diag = a->diag; 697 idiag = a->idiag; 698 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 699 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 700 701 if (flag & SOR_ZERO_INITIAL_GUESS) { 702 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 703 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 704 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 705 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 706 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 707 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 708 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 709 i2 = 6; 710 idiag += 36; 711 for (i=1; i<m; i++) { 712 v = aa + 36*ai[i]; 713 vi = aj + ai[i]; 714 nz = diag[i] - ai[i]; 715 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 716 while (nz--) { 717 idx = 6*(*vi++); 718 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 719 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 720 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 721 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 722 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 723 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 724 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 725 v += 36; 726 } 727 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 728 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 729 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 730 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 731 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 732 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 733 idiag += 36; 734 i2 += 6; 735 } 736 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 737 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 738 } 739 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 740 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 741 i2 = 0; 742 mdiag = a->idiag+36*a->mbs; 743 for (i=0; i<m; i++) { 744 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 745 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 746 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 747 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 748 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 749 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 750 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 751 mdiag += 36; 752 i2 += 6; 753 } 754 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 755 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 756 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 757 } 758 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 759 idiag = a->idiag+36*a->mbs - 36; 760 i2 = 6*m - 6; 761 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 762 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 763 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 764 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 765 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 766 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 767 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 768 idiag -= 36; 769 i2 -= 6; 770 for (i=m-2; i>=0; i--) { 771 v = aa + 36*(diag[i]+1); 772 vi = aj + diag[i] + 1; 773 nz = ai[i+1] - diag[i] - 1; 774 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 775 while (nz--) { 776 idx = 6*(*vi++); 777 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 778 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 779 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 780 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 781 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 782 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 783 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 784 v += 36; 785 } 786 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 787 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 788 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 789 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 790 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 791 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 792 idiag -= 36; 793 i2 -= 6; 794 } 795 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 796 } 797 } else { 798 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 799 } 800 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 801 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 807 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 810 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 811 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 812 const PetscScalar *b; 813 PetscErrorCode ierr; 814 PetscInt m = a->mbs,i,i2,nz,idx; 815 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 816 817 PetscFunctionBegin; 818 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 819 its = its*lits; 820 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 821 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 822 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 823 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 824 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 825 826 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 827 828 diag = a->diag; 829 idiag = a->idiag; 830 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 831 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 832 833 if (flag & SOR_ZERO_INITIAL_GUESS) { 834 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 835 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 836 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 837 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 838 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 839 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 840 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 841 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 842 i2 = 7; 843 idiag += 49; 844 for (i=1; i<m; i++) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = diag[i] - ai[i]; 848 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 849 while (nz--) { 850 idx = 7*(*vi++); 851 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 852 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 853 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 854 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 855 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 856 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 857 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 858 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 859 v += 49; 860 } 861 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 862 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 863 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 864 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 865 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 866 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 867 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 868 idiag += 49; 869 i2 += 7; 870 } 871 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 872 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 873 } 874 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 875 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 876 i2 = 0; 877 mdiag = a->idiag+49*a->mbs; 878 for (i=0; i<m; i++) { 879 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 880 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 881 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 882 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 883 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 884 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 885 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 886 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 887 mdiag += 49; 888 i2 += 7; 889 } 890 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 891 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 892 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 893 } 894 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 895 idiag = a->idiag+49*a->mbs - 49; 896 i2 = 7*m - 7; 897 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 898 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 899 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 900 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 901 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 902 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 903 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 904 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 905 idiag -= 49; 906 i2 -= 7; 907 for (i=m-2; i>=0; i--) { 908 v = aa + 49*(diag[i]+1); 909 vi = aj + diag[i] + 1; 910 nz = ai[i+1] - diag[i] - 1; 911 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 912 while (nz--) { 913 idx = 7*(*vi++); 914 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 915 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 916 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 917 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 918 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 919 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 920 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 921 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 922 v += 49; 923 } 924 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 925 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 926 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 927 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 928 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 929 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 930 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 931 idiag -= 49; 932 i2 -= 7; 933 } 934 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 935 } 936 } else { 937 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 938 } 939 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 940 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 946 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 947 { 948 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 949 PetscScalar *x,*work,*w,*workt; 950 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 951 const PetscScalar *b; 952 PetscErrorCode ierr; 953 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 954 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 955 956 PetscFunctionBegin; 957 its = its*lits; 958 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 959 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 960 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 961 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 962 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 963 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 964 965 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 966 967 diag = a->diag; 968 idiag = a->idiag; 969 if (!a->mult_work) { 970 k = PetscMax(A->rmap->n,A->cmap->n); 971 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 972 } 973 work = a->mult_work; 974 if (!a->sor_work) { 975 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 976 } 977 w = a->sor_work; 978 979 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 980 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 981 982 if (flag & SOR_ZERO_INITIAL_GUESS) { 983 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 984 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 985 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 986 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 987 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 988 i2 = bs; 989 idiag += bs2; 990 for (i=1; i<m; i++) { 991 v = aa + bs2*ai[i]; 992 vi = aj + ai[i]; 993 nz = diag[i] - ai[i]; 994 995 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 996 /* copy all rows of x that are needed into contiguous space */ 997 workt = work; 998 for (j=0; j<nz; j++) { 999 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1000 workt += bs; 1001 } 1002 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1003 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1004 while (nz--) { 1005 idx = N*(*vi++); 1006 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1007 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1008 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1009 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1010 v += N2; 1011 } */ 1012 1013 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1014 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1015 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1016 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1017 1018 idiag += bs2; 1019 i2 += bs; 1020 } 1021 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1022 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1023 } 1024 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1025 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1026 i2 = 0; 1027 mdiag = a->idiag+bs2*a->mbs; 1028 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1029 for (i=0; i<m; i++) { 1030 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1031 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1032 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1033 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1034 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1035 1036 mdiag += bs2; 1037 i2 += bs; 1038 } 1039 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1040 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1041 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1044 idiag = a->idiag+bs2*a->mbs - bs2; 1045 i2 = bs*m - bs; 1046 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1047 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1048 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1049 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1050 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1051 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1052 idiag -= bs2; 1053 i2 -= bs; 1054 for (i=m-2; i>=0; i--) { 1055 v = aa + bs2*(diag[i]+1); 1056 vi = aj + diag[i] + 1; 1057 nz = ai[i+1] - diag[i] - 1; 1058 1059 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1060 /* copy all rows of x that are needed into contiguous space */ 1061 workt = work; 1062 for (j=0; j<nz; j++) { 1063 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1064 workt += bs; 1065 } 1066 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1067 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1068 while (nz--) { 1069 idx = N*(*vi++); 1070 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1071 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1072 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1073 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1074 v += N2; 1075 } */ 1076 1077 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1078 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1079 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1080 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1081 idiag -= bs2; 1082 i2 -= bs; 1083 } 1084 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1085 } 1086 } else { 1087 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1088 } 1089 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1090 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 /* 1095 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1096 */ 1097 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1098 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1099 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1100 #define matsetvaluesblocked4_ matsetvaluesblocked4 1101 #endif 1102 1103 EXTERN_C_BEGIN 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "matsetvaluesblocked4_" 1106 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1107 { 1108 Mat A = *AA; 1109 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1110 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1111 PetscInt *ai=a->i,*ailen=a->ilen; 1112 PetscInt *aj=a->j,stepval,lastcol = -1; 1113 const PetscScalar *value = v; 1114 MatScalar *ap,*aa = a->a,*bap; 1115 1116 PetscFunctionBegin; 1117 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1118 stepval = (n-1)*4; 1119 for (k=0; k<m; k++) { /* loop over added rows */ 1120 row = im[k]; 1121 rp = aj + ai[row]; 1122 ap = aa + 16*ai[row]; 1123 nrow = ailen[row]; 1124 low = 0; 1125 high = nrow; 1126 for (l=0; l<n; l++) { /* loop over added columns */ 1127 col = in[l]; 1128 if (col <= lastcol) low = 0; else high = nrow; 1129 lastcol = col; 1130 value = v + k*(stepval+4 + l)*4; 1131 while (high-low > 7) { 1132 t = (low+high)/2; 1133 if (rp[t] > col) high = t; 1134 else low = t; 1135 } 1136 for (i=low; i<high; i++) { 1137 if (rp[i] > col) break; 1138 if (rp[i] == col) { 1139 bap = ap + 16*i; 1140 for (ii=0; ii<4; ii++,value+=stepval) { 1141 for (jj=ii; jj<16; jj+=4) { 1142 bap[jj] += *value++; 1143 } 1144 } 1145 goto noinsert2; 1146 } 1147 } 1148 N = nrow++ - 1; 1149 high++; /* added new column index thus must search to one higher than before */ 1150 /* shift up all the later entries in this row */ 1151 for (ii=N; ii>=i; ii--) { 1152 rp[ii+1] = rp[ii]; 1153 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1154 } 1155 if (N >= i) { 1156 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1157 } 1158 rp[i] = col; 1159 bap = ap + 16*i; 1160 for (ii=0; ii<4; ii++,value+=stepval) { 1161 for (jj=ii; jj<16; jj+=4) { 1162 bap[jj] = *value++; 1163 } 1164 } 1165 noinsert2:; 1166 low = i; 1167 } 1168 ailen[row] = nrow; 1169 } 1170 PetscFunctionReturnVoid(); 1171 } 1172 EXTERN_C_END 1173 1174 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1175 #define matsetvalues4_ MATSETVALUES4 1176 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1177 #define matsetvalues4_ matsetvalues4 1178 #endif 1179 1180 EXTERN_C_BEGIN 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatSetValues4_" 1183 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1184 { 1185 Mat A = *AA; 1186 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1187 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1188 PetscInt *ai=a->i,*ailen=a->ilen; 1189 PetscInt *aj=a->j,brow,bcol; 1190 PetscInt ridx,cidx,lastcol = -1; 1191 MatScalar *ap,value,*aa=a->a,*bap; 1192 1193 PetscFunctionBegin; 1194 for (k=0; k<m; k++) { /* loop over added rows */ 1195 row = im[k]; brow = row/4; 1196 rp = aj + ai[brow]; 1197 ap = aa + 16*ai[brow]; 1198 nrow = ailen[brow]; 1199 low = 0; 1200 high = nrow; 1201 for (l=0; l<n; l++) { /* loop over added columns */ 1202 col = in[l]; bcol = col/4; 1203 ridx = row % 4; cidx = col % 4; 1204 value = v[l + k*n]; 1205 if (col <= lastcol) low = 0; else high = nrow; 1206 lastcol = col; 1207 while (high-low > 7) { 1208 t = (low+high)/2; 1209 if (rp[t] > bcol) high = t; 1210 else low = t; 1211 } 1212 for (i=low; i<high; i++) { 1213 if (rp[i] > bcol) break; 1214 if (rp[i] == bcol) { 1215 bap = ap + 16*i + 4*cidx + ridx; 1216 *bap += value; 1217 goto noinsert1; 1218 } 1219 } 1220 N = nrow++ - 1; 1221 high++; /* added new column thus must search to one higher than before */ 1222 /* shift up all the later entries in this row */ 1223 for (ii=N; ii>=i; ii--) { 1224 rp[ii+1] = rp[ii]; 1225 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1226 } 1227 if (N>=i) { 1228 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1229 } 1230 rp[i] = bcol; 1231 ap[16*i + 4*cidx + ridx] = value; 1232 noinsert1:; 1233 low = i; 1234 } 1235 ailen[brow] = nrow; 1236 } 1237 PetscFunctionReturnVoid(); 1238 } 1239 EXTERN_C_END 1240 1241 /* 1242 Checks for missing diagonals 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1246 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1247 { 1248 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1249 PetscErrorCode ierr; 1250 PetscInt *diag,*jj = a->j,i; 1251 1252 PetscFunctionBegin; 1253 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1254 *missing = PETSC_FALSE; 1255 if (A->rmap->n > 0 && !jj) { 1256 *missing = PETSC_TRUE; 1257 if (d) *d = 0; 1258 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1259 } else { 1260 diag = a->diag; 1261 for (i=0; i<a->mbs; i++) { 1262 if (jj[diag[i]] != i) { 1263 *missing = PETSC_TRUE; 1264 if (d) *d = i; 1265 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1266 } 1267 } 1268 } 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #undef __FUNCT__ 1273 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1274 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1275 { 1276 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1277 PetscErrorCode ierr; 1278 PetscInt i,j,m = a->mbs; 1279 1280 PetscFunctionBegin; 1281 if (!a->diag) { 1282 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1283 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1284 a->free_diag = PETSC_TRUE; 1285 } 1286 for (i=0; i<m; i++) { 1287 a->diag[i] = a->i[i+1]; 1288 for (j=a->i[i]; j<a->i[i+1]; j++) { 1289 if (a->j[j] == i) { 1290 a->diag[i] = j; 1291 break; 1292 } 1293 } 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 1299 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1303 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1304 { 1305 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1306 PetscErrorCode ierr; 1307 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1308 PetscInt *tia, *tja; 1309 1310 PetscFunctionBegin; 1311 *nn = n; 1312 if (!ia) PetscFunctionReturn(0); 1313 if (symmetric) { 1314 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1315 nz = tia[n]; 1316 } else { 1317 tia = a->i; tja = a->j; 1318 } 1319 1320 if (!blockcompressed && bs > 1) { 1321 (*nn) *= bs; 1322 /* malloc & create the natural set of indices */ 1323 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1324 if (n) { 1325 (*ia)[0] = 0; 1326 for (j=1; j<bs; j++) { 1327 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1328 } 1329 } 1330 1331 for (i=1; i<n; i++) { 1332 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1333 for (j=1; j<bs; j++) { 1334 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1335 } 1336 } 1337 if (n) { 1338 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1339 } 1340 1341 if (ja) { 1342 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1343 cnt = 0; 1344 for (i=0; i<n; i++) { 1345 for (j=0; j<bs; j++) { 1346 for (k=tia[i]; k<tia[i+1]; k++) { 1347 for (l=0; l<bs; l++) { 1348 (*ja)[cnt++] = bs*tja[k] + l; 1349 } 1350 } 1351 } 1352 } 1353 } 1354 1355 n *= bs; 1356 nz *= bs*bs; 1357 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1358 ierr = PetscFree(tia);CHKERRQ(ierr); 1359 ierr = PetscFree(tja);CHKERRQ(ierr); 1360 } 1361 } else if (oshift == 1) { 1362 if (symmetric) { 1363 PetscInt nz = tia[A->rmap->n/bs]; 1364 /* add 1 to i and j indices */ 1365 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1366 *ia = tia; 1367 if (ja) { 1368 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1369 *ja = tja; 1370 } 1371 } else { 1372 PetscInt nz = a->i[A->rmap->n/bs]; 1373 /* malloc space and add 1 to i and j indices */ 1374 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1375 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1376 if (ja) { 1377 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1378 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1379 } 1380 } 1381 } else { 1382 *ia = tia; 1383 if (ja) *ja = tja; 1384 } 1385 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1391 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1392 { 1393 PetscErrorCode ierr; 1394 1395 PetscFunctionBegin; 1396 if (!ia) PetscFunctionReturn(0); 1397 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1398 ierr = PetscFree(*ia);CHKERRQ(ierr); 1399 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1406 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 #if defined(PETSC_USE_LOG) 1413 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1414 #endif 1415 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1416 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1417 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1418 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1419 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1420 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1421 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1422 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1423 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1424 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1425 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1426 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1427 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1428 1429 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1430 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1431 ierr = PetscFree(A->data);CHKERRQ(ierr); 1432 1433 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1436 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1437 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1443 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1449 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1450 { 1451 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1452 PetscErrorCode ierr; 1453 1454 PetscFunctionBegin; 1455 switch (op) { 1456 case MAT_ROW_ORIENTED: 1457 a->roworiented = flg; 1458 break; 1459 case MAT_KEEP_NONZERO_PATTERN: 1460 a->keepnonzeropattern = flg; 1461 break; 1462 case MAT_NEW_NONZERO_LOCATIONS: 1463 a->nonew = (flg ? 0 : 1); 1464 break; 1465 case MAT_NEW_NONZERO_LOCATION_ERR: 1466 a->nonew = (flg ? -1 : 0); 1467 break; 1468 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1469 a->nonew = (flg ? -2 : 0); 1470 break; 1471 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1472 a->nounused = (flg ? -1 : 0); 1473 break; 1474 case MAT_CHECK_COMPRESSED_ROW: 1475 a->compressedrow.check = flg; 1476 break; 1477 case MAT_NEW_DIAGONALS: 1478 case MAT_IGNORE_OFF_PROC_ENTRIES: 1479 case MAT_USE_HASH_TABLE: 1480 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1481 break; 1482 case MAT_SYMMETRIC: 1483 case MAT_STRUCTURALLY_SYMMETRIC: 1484 case MAT_HERMITIAN: 1485 case MAT_SYMMETRY_ETERNAL: 1486 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1487 break; 1488 default: 1489 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1490 } 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1496 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1497 { 1498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1501 MatScalar *aa,*aa_i; 1502 PetscScalar *v_i; 1503 1504 PetscFunctionBegin; 1505 bs = A->rmap->bs; 1506 ai = a->i; 1507 aj = a->j; 1508 aa = a->a; 1509 bs2 = a->bs2; 1510 1511 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1512 1513 bn = row/bs; /* Block number */ 1514 bp = row % bs; /* Block Position */ 1515 M = ai[bn+1] - ai[bn]; 1516 *nz = bs*M; 1517 1518 if (v) { 1519 *v = 0; 1520 if (*nz) { 1521 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1522 for (i=0; i<M; i++) { /* for each block in the block row */ 1523 v_i = *v + i*bs; 1524 aa_i = aa + bs2*(ai[bn] + i); 1525 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1526 } 1527 } 1528 } 1529 1530 if (idx) { 1531 *idx = 0; 1532 if (*nz) { 1533 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1534 for (i=0; i<M; i++) { /* for each block in the block row */ 1535 idx_i = *idx + i*bs; 1536 itmp = bs*aj[ai[bn] + i]; 1537 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1538 } 1539 } 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1546 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1547 { 1548 PetscErrorCode ierr; 1549 1550 PetscFunctionBegin; 1551 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1552 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1553 PetscFunctionReturn(0); 1554 } 1555 1556 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1560 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1561 { 1562 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1563 Mat C; 1564 PetscErrorCode ierr; 1565 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1566 PetscInt *rows,*cols,bs2=a->bs2; 1567 MatScalar *array; 1568 1569 PetscFunctionBegin; 1570 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1571 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1572 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1573 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1574 1575 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1576 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1577 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1578 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1579 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1580 ierr = PetscFree(col);CHKERRQ(ierr); 1581 } else { 1582 C = *B; 1583 } 1584 1585 array = a->a; 1586 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1587 for (i=0; i<mbs; i++) { 1588 cols[0] = i*bs; 1589 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1590 len = ai[i+1] - ai[i]; 1591 for (j=0; j<len; j++) { 1592 rows[0] = (*aj++)*bs; 1593 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1594 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1595 array += bs2; 1596 } 1597 } 1598 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1599 1600 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1601 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1602 1603 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1604 *B = C; 1605 } else { 1606 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 EXTERN_C_BEGIN 1612 #undef __FUNCT__ 1613 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1614 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1615 { 1616 PetscErrorCode ierr; 1617 Mat Btrans; 1618 1619 PetscFunctionBegin; 1620 *f = PETSC_FALSE; 1621 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1622 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1623 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1624 PetscFunctionReturn(0); 1625 } 1626 EXTERN_C_END 1627 1628 #undef __FUNCT__ 1629 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1630 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1631 { 1632 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1633 PetscErrorCode ierr; 1634 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1635 int fd; 1636 PetscScalar *aa; 1637 FILE *file; 1638 1639 PetscFunctionBegin; 1640 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1641 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1642 col_lens[0] = MAT_FILE_CLASSID; 1643 1644 col_lens[1] = A->rmap->N; 1645 col_lens[2] = A->cmap->n; 1646 col_lens[3] = a->nz*bs2; 1647 1648 /* store lengths of each row and write (including header) to file */ 1649 count = 0; 1650 for (i=0; i<a->mbs; i++) { 1651 for (j=0; j<bs; j++) { 1652 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1653 } 1654 } 1655 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1656 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1657 1658 /* store column indices (zero start index) */ 1659 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1660 count = 0; 1661 for (i=0; i<a->mbs; i++) { 1662 for (j=0; j<bs; j++) { 1663 for (k=a->i[i]; k<a->i[i+1]; k++) { 1664 for (l=0; l<bs; l++) { 1665 jj[count++] = bs*a->j[k] + l; 1666 } 1667 } 1668 } 1669 } 1670 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1671 ierr = PetscFree(jj);CHKERRQ(ierr); 1672 1673 /* store nonzero values */ 1674 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1675 count = 0; 1676 for (i=0; i<a->mbs; i++) { 1677 for (j=0; j<bs; j++) { 1678 for (k=a->i[i]; k<a->i[i+1]; k++) { 1679 for (l=0; l<bs; l++) { 1680 aa[count++] = a->a[bs2*k + l*bs + j]; 1681 } 1682 } 1683 } 1684 } 1685 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1686 ierr = PetscFree(aa);CHKERRQ(ierr); 1687 1688 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1689 if (file) { 1690 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1691 } 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1697 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1698 { 1699 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1700 PetscErrorCode ierr; 1701 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1702 PetscViewerFormat format; 1703 1704 PetscFunctionBegin; 1705 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1706 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1707 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1708 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1709 Mat aij; 1710 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1711 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1712 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1713 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1714 PetscFunctionReturn(0); 1715 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1716 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1717 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1718 for (i=0; i<a->mbs; i++) { 1719 for (j=0; j<bs; j++) { 1720 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1721 for (k=a->i[i]; k<a->i[i+1]; k++) { 1722 for (l=0; l<bs; l++) { 1723 #if defined(PETSC_USE_COMPLEX) 1724 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1725 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1726 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1727 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1728 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1729 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1730 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1731 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1732 } 1733 #else 1734 if (a->a[bs2*k + l*bs + j] != 0.0) { 1735 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1736 } 1737 #endif 1738 } 1739 } 1740 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1741 } 1742 } 1743 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1744 } else { 1745 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1746 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1747 for (i=0; i<a->mbs; i++) { 1748 for (j=0; j<bs; j++) { 1749 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1750 for (k=a->i[i]; k<a->i[i+1]; k++) { 1751 for (l=0; l<bs; l++) { 1752 #if defined(PETSC_USE_COMPLEX) 1753 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1754 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1755 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1756 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1757 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1758 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1759 } else { 1760 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1761 } 1762 #else 1763 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1764 #endif 1765 } 1766 } 1767 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1768 } 1769 } 1770 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1771 } 1772 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1778 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1779 { 1780 Mat A = (Mat) Aa; 1781 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1784 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1785 MatScalar *aa; 1786 PetscViewer viewer; 1787 PetscViewerFormat format; 1788 1789 PetscFunctionBegin; 1790 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1791 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1792 1793 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1794 1795 /* loop over matrix elements drawing boxes */ 1796 1797 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1798 color = PETSC_DRAW_BLUE; 1799 for (i=0,row=0; i<mbs; i++,row+=bs) { 1800 for (j=a->i[i]; j<a->i[i+1]; j++) { 1801 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1802 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1803 aa = a->a + j*bs2; 1804 for (k=0; k<bs; k++) { 1805 for (l=0; l<bs; l++) { 1806 if (PetscRealPart(*aa++) >= 0.) continue; 1807 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1808 } 1809 } 1810 } 1811 } 1812 color = PETSC_DRAW_CYAN; 1813 for (i=0,row=0; i<mbs; i++,row+=bs) { 1814 for (j=a->i[i]; j<a->i[i+1]; j++) { 1815 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1816 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1817 aa = a->a + j*bs2; 1818 for (k=0; k<bs; k++) { 1819 for (l=0; l<bs; l++) { 1820 if (PetscRealPart(*aa++) != 0.) continue; 1821 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1822 } 1823 } 1824 } 1825 } 1826 color = PETSC_DRAW_RED; 1827 for (i=0,row=0; i<mbs; i++,row+=bs) { 1828 for (j=a->i[i]; j<a->i[i+1]; j++) { 1829 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1830 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1831 aa = a->a + j*bs2; 1832 for (k=0; k<bs; k++) { 1833 for (l=0; l<bs; l++) { 1834 if (PetscRealPart(*aa++) <= 0.) continue; 1835 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1836 } 1837 } 1838 } 1839 } 1840 } else { 1841 /* use contour shading to indicate magnitude of values */ 1842 /* first determine max of all nonzero values */ 1843 PetscDraw popup; 1844 PetscReal scale,maxv = 0.0; 1845 1846 for (i=0; i<a->nz*a->bs2; i++) { 1847 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1848 } 1849 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1850 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1851 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1852 for (i=0,row=0; i<mbs; i++,row+=bs) { 1853 for (j=a->i[i]; j<a->i[i+1]; j++) { 1854 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1855 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1856 aa = a->a + j*bs2; 1857 for (k=0; k<bs; k++) { 1858 for (l=0; l<bs; l++) { 1859 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1860 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1861 } 1862 } 1863 } 1864 } 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1871 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1872 { 1873 PetscErrorCode ierr; 1874 PetscReal xl,yl,xr,yr,w,h; 1875 PetscDraw draw; 1876 PetscBool isnull; 1877 1878 PetscFunctionBegin; 1879 1880 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1881 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1882 1883 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1884 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1885 xr += w; yr += h; xl = -w; yl = -h; 1886 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1887 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1888 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatView_SeqBAIJ" 1894 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1895 { 1896 PetscErrorCode ierr; 1897 PetscBool iascii,isbinary,isdraw; 1898 1899 PetscFunctionBegin; 1900 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1901 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1902 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1903 if (iascii){ 1904 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1905 } else if (isbinary) { 1906 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1907 } else if (isdraw) { 1908 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1909 } else { 1910 Mat B; 1911 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1912 ierr = MatView(B,viewer);CHKERRQ(ierr); 1913 ierr = MatDestroy(&B);CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1921 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1922 { 1923 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1924 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1925 PetscInt *ai = a->i,*ailen = a->ilen; 1926 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1927 MatScalar *ap,*aa = a->a; 1928 1929 PetscFunctionBegin; 1930 for (k=0; k<m; k++) { /* loop over rows */ 1931 row = im[k]; brow = row/bs; 1932 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1933 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1934 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1935 nrow = ailen[brow]; 1936 for (l=0; l<n; l++) { /* loop over columns */ 1937 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1938 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1939 col = in[l] ; 1940 bcol = col/bs; 1941 cidx = col%bs; 1942 ridx = row%bs; 1943 high = nrow; 1944 low = 0; /* assume unsorted */ 1945 while (high-low > 5) { 1946 t = (low+high)/2; 1947 if (rp[t] > bcol) high = t; 1948 else low = t; 1949 } 1950 for (i=low; i<high; i++) { 1951 if (rp[i] > bcol) break; 1952 if (rp[i] == bcol) { 1953 *v++ = ap[bs2*i+bs*cidx+ridx]; 1954 goto finished; 1955 } 1956 } 1957 *v++ = 0.0; 1958 finished:; 1959 } 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1966 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1967 { 1968 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1969 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1970 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1971 PetscErrorCode ierr; 1972 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1973 PetscBool roworiented=a->roworiented; 1974 const PetscScalar *value = v; 1975 MatScalar *ap,*aa = a->a,*bap; 1976 1977 PetscFunctionBegin; 1978 if (roworiented) { 1979 stepval = (n-1)*bs; 1980 } else { 1981 stepval = (m-1)*bs; 1982 } 1983 for (k=0; k<m; k++) { /* loop over added rows */ 1984 row = im[k]; 1985 if (row < 0) continue; 1986 #if defined(PETSC_USE_DEBUG) 1987 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1988 #endif 1989 rp = aj + ai[row]; 1990 ap = aa + bs2*ai[row]; 1991 rmax = imax[row]; 1992 nrow = ailen[row]; 1993 low = 0; 1994 high = nrow; 1995 for (l=0; l<n; l++) { /* loop over added columns */ 1996 if (in[l] < 0) continue; 1997 #if defined(PETSC_USE_DEBUG) 1998 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1999 #endif 2000 col = in[l]; 2001 if (roworiented) { 2002 value = v + (k*(stepval+bs) + l)*bs; 2003 } else { 2004 value = v + (l*(stepval+bs) + k)*bs; 2005 } 2006 if (col <= lastcol) low = 0; else high = nrow; 2007 lastcol = col; 2008 while (high-low > 7) { 2009 t = (low+high)/2; 2010 if (rp[t] > col) high = t; 2011 else low = t; 2012 } 2013 for (i=low; i<high; i++) { 2014 if (rp[i] > col) break; 2015 if (rp[i] == col) { 2016 bap = ap + bs2*i; 2017 if (roworiented) { 2018 if (is == ADD_VALUES) { 2019 for (ii=0; ii<bs; ii++,value+=stepval) { 2020 for (jj=ii; jj<bs2; jj+=bs) { 2021 bap[jj] += *value++; 2022 } 2023 } 2024 } else { 2025 for (ii=0; ii<bs; ii++,value+=stepval) { 2026 for (jj=ii; jj<bs2; jj+=bs) { 2027 bap[jj] = *value++; 2028 } 2029 } 2030 } 2031 } else { 2032 if (is == ADD_VALUES) { 2033 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2034 for (jj=0; jj<bs; jj++) { 2035 bap[jj] += value[jj]; 2036 } 2037 bap += bs; 2038 } 2039 } else { 2040 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2041 for (jj=0; jj<bs; jj++) { 2042 bap[jj] = value[jj]; 2043 } 2044 bap += bs; 2045 } 2046 } 2047 } 2048 goto noinsert2; 2049 } 2050 } 2051 if (nonew == 1) goto noinsert2; 2052 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2053 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2054 N = nrow++ - 1; high++; 2055 /* shift up all the later entries in this row */ 2056 for (ii=N; ii>=i; ii--) { 2057 rp[ii+1] = rp[ii]; 2058 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2059 } 2060 if (N >= i) { 2061 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2062 } 2063 rp[i] = col; 2064 bap = ap + bs2*i; 2065 if (roworiented) { 2066 for (ii=0; ii<bs; ii++,value+=stepval) { 2067 for (jj=ii; jj<bs2; jj+=bs) { 2068 bap[jj] = *value++; 2069 } 2070 } 2071 } else { 2072 for (ii=0; ii<bs; ii++,value+=stepval) { 2073 for (jj=0; jj<bs; jj++) { 2074 *bap++ = *value++; 2075 } 2076 } 2077 } 2078 noinsert2:; 2079 low = i; 2080 } 2081 ailen[row] = nrow; 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2088 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2089 { 2090 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2091 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2092 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2093 PetscErrorCode ierr; 2094 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2095 MatScalar *aa = a->a,*ap; 2096 PetscReal ratio=0.6; 2097 2098 PetscFunctionBegin; 2099 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2100 2101 if (m) rmax = ailen[0]; 2102 for (i=1; i<mbs; i++) { 2103 /* move each row back by the amount of empty slots (fshift) before it*/ 2104 fshift += imax[i-1] - ailen[i-1]; 2105 rmax = PetscMax(rmax,ailen[i]); 2106 if (fshift) { 2107 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2108 N = ailen[i]; 2109 for (j=0; j<N; j++) { 2110 ip[j-fshift] = ip[j]; 2111 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2112 } 2113 } 2114 ai[i] = ai[i-1] + ailen[i-1]; 2115 } 2116 if (mbs) { 2117 fshift += imax[mbs-1] - ailen[mbs-1]; 2118 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2119 } 2120 /* reset ilen and imax for each row */ 2121 for (i=0; i<mbs; i++) { 2122 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2123 } 2124 a->nz = ai[mbs]; 2125 2126 /* diagonals may have moved, so kill the diagonal pointers */ 2127 a->idiagvalid = PETSC_FALSE; 2128 if (fshift && a->diag) { 2129 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2130 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2131 a->diag = 0; 2132 } 2133 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2134 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2135 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2136 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2137 A->info.mallocs += a->reallocs; 2138 a->reallocs = 0; 2139 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2140 2141 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2142 A->same_nonzero = PETSC_TRUE; 2143 PetscFunctionReturn(0); 2144 } 2145 2146 /* 2147 This function returns an array of flags which indicate the locations of contiguous 2148 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2149 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2150 Assume: sizes should be long enough to hold all the values. 2151 */ 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2154 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2155 { 2156 PetscInt i,j,k,row; 2157 PetscBool flg; 2158 2159 PetscFunctionBegin; 2160 for (i=0,j=0; i<n; j++) { 2161 row = idx[i]; 2162 if (row%bs!=0) { /* Not the begining of a block */ 2163 sizes[j] = 1; 2164 i++; 2165 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2166 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2167 i++; 2168 } else { /* Begining of the block, so check if the complete block exists */ 2169 flg = PETSC_TRUE; 2170 for (k=1; k<bs; k++) { 2171 if (row+k != idx[i+k]) { /* break in the block */ 2172 flg = PETSC_FALSE; 2173 break; 2174 } 2175 } 2176 if (flg) { /* No break in the bs */ 2177 sizes[j] = bs; 2178 i+= bs; 2179 } else { 2180 sizes[j] = 1; 2181 i++; 2182 } 2183 } 2184 } 2185 *bs_max = j; 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2191 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2192 { 2193 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2194 PetscErrorCode ierr; 2195 PetscInt i,j,k,count,*rows; 2196 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2197 PetscScalar zero = 0.0; 2198 MatScalar *aa; 2199 const PetscScalar *xx; 2200 PetscScalar *bb; 2201 2202 PetscFunctionBegin; 2203 /* fix right hand side if needed */ 2204 if (x && b) { 2205 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2206 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2207 for (i=0; i<is_n; i++) { 2208 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2209 } 2210 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2211 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2212 } 2213 2214 /* Make a copy of the IS and sort it */ 2215 /* allocate memory for rows,sizes */ 2216 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2217 2218 /* copy IS values to rows, and sort them */ 2219 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2220 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2221 2222 if (baij->keepnonzeropattern) { 2223 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2224 bs_max = is_n; 2225 A->same_nonzero = PETSC_TRUE; 2226 } else { 2227 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2228 A->same_nonzero = PETSC_FALSE; 2229 } 2230 2231 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2232 row = rows[j]; 2233 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2234 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2235 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2236 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2237 if (diag != (PetscScalar)0.0) { 2238 if (baij->ilen[row/bs] > 0) { 2239 baij->ilen[row/bs] = 1; 2240 baij->j[baij->i[row/bs]] = row/bs; 2241 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2242 } 2243 /* Now insert all the diagonal values for this bs */ 2244 for (k=0; k<bs; k++) { 2245 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2246 } 2247 } else { /* (diag == 0.0) */ 2248 baij->ilen[row/bs] = 0; 2249 } /* end (diag == 0.0) */ 2250 } else { /* (sizes[i] != bs) */ 2251 #if defined (PETSC_USE_DEBUG) 2252 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2253 #endif 2254 for (k=0; k<count; k++) { 2255 aa[0] = zero; 2256 aa += bs; 2257 } 2258 if (diag != (PetscScalar)0.0) { 2259 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2260 } 2261 } 2262 } 2263 2264 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2265 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2271 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2272 { 2273 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2274 PetscErrorCode ierr; 2275 PetscInt i,j,k,count; 2276 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2277 PetscScalar zero = 0.0; 2278 MatScalar *aa; 2279 const PetscScalar *xx; 2280 PetscScalar *bb; 2281 PetscBool *zeroed,vecs = PETSC_FALSE; 2282 2283 PetscFunctionBegin; 2284 /* fix right hand side if needed */ 2285 if (x && b) { 2286 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2287 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2288 vecs = PETSC_TRUE; 2289 } 2290 A->same_nonzero = PETSC_TRUE; 2291 2292 /* zero the columns */ 2293 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2294 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2295 for (i=0; i<is_n; i++) { 2296 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2297 zeroed[is_idx[i]] = PETSC_TRUE; 2298 } 2299 for (i=0; i<A->rmap->N; i++) { 2300 if (!zeroed[i]) { 2301 row = i/bs; 2302 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2303 for (k=0; k<bs; k++) { 2304 col = bs*baij->j[j] + k; 2305 if (zeroed[col]) { 2306 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2307 if (vecs) bb[i] -= aa[0]*xx[col]; 2308 aa[0] = 0.0; 2309 } 2310 } 2311 } 2312 } else if (vecs) bb[i] = diag*xx[i]; 2313 } 2314 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2315 if (vecs) { 2316 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2317 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2318 } 2319 2320 /* zero the rows */ 2321 for (i=0; i<is_n; i++) { 2322 row = is_idx[i]; 2323 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2324 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2325 for (k=0; k<count; k++) { 2326 aa[0] = zero; 2327 aa += bs; 2328 } 2329 if (diag != (PetscScalar)0.0) { 2330 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2331 } 2332 } 2333 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2339 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2340 { 2341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2342 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2343 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2344 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2345 PetscErrorCode ierr; 2346 PetscInt ridx,cidx,bs2=a->bs2; 2347 PetscBool roworiented=a->roworiented; 2348 MatScalar *ap,value,*aa=a->a,*bap; 2349 2350 PetscFunctionBegin; 2351 if (v) PetscValidScalarPointer(v,6); 2352 for (k=0; k<m; k++) { /* loop over added rows */ 2353 row = im[k]; 2354 brow = row/bs; 2355 if (row < 0) continue; 2356 #if defined(PETSC_USE_DEBUG) 2357 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2358 #endif 2359 rp = aj + ai[brow]; 2360 ap = aa + bs2*ai[brow]; 2361 rmax = imax[brow]; 2362 nrow = ailen[brow]; 2363 low = 0; 2364 high = nrow; 2365 for (l=0; l<n; l++) { /* loop over added columns */ 2366 if (in[l] < 0) continue; 2367 #if defined(PETSC_USE_DEBUG) 2368 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2369 #endif 2370 col = in[l]; bcol = col/bs; 2371 ridx = row % bs; cidx = col % bs; 2372 if (roworiented) { 2373 value = v[l + k*n]; 2374 } else { 2375 value = v[k + l*m]; 2376 } 2377 if (col <= lastcol) low = 0; else high = nrow; 2378 lastcol = col; 2379 while (high-low > 7) { 2380 t = (low+high)/2; 2381 if (rp[t] > bcol) high = t; 2382 else low = t; 2383 } 2384 for (i=low; i<high; i++) { 2385 if (rp[i] > bcol) break; 2386 if (rp[i] == bcol) { 2387 bap = ap + bs2*i + bs*cidx + ridx; 2388 if (is == ADD_VALUES) *bap += value; 2389 else *bap = value; 2390 goto noinsert1; 2391 } 2392 } 2393 if (nonew == 1) goto noinsert1; 2394 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2395 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2396 N = nrow++ - 1; high++; 2397 /* shift up all the later entries in this row */ 2398 for (ii=N; ii>=i; ii--) { 2399 rp[ii+1] = rp[ii]; 2400 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2401 } 2402 if (N>=i) { 2403 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2404 } 2405 rp[i] = bcol; 2406 ap[bs2*i + bs*cidx + ridx] = value; 2407 a->nz++; 2408 noinsert1:; 2409 low = i; 2410 } 2411 ailen[brow] = nrow; 2412 } 2413 A->same_nonzero = PETSC_FALSE; 2414 PetscFunctionReturn(0); 2415 } 2416 2417 #undef __FUNCT__ 2418 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2419 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2420 { 2421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2422 Mat outA; 2423 PetscErrorCode ierr; 2424 PetscBool row_identity,col_identity; 2425 2426 PetscFunctionBegin; 2427 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2428 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2429 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2430 if (!row_identity || !col_identity) { 2431 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2432 } 2433 2434 outA = inA; 2435 inA->factortype = MAT_FACTOR_LU; 2436 2437 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2438 2439 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2440 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2441 a->row = row; 2442 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2443 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2444 a->col = col; 2445 2446 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2447 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2448 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2449 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2450 2451 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2452 if (!a->solve_work) { 2453 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2454 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2455 } 2456 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2457 2458 PetscFunctionReturn(0); 2459 } 2460 2461 EXTERN_C_BEGIN 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2464 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2465 { 2466 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2467 PetscInt i,nz,mbs; 2468 2469 PetscFunctionBegin; 2470 nz = baij->maxnz; 2471 mbs = baij->mbs; 2472 for (i=0; i<nz; i++) { 2473 baij->j[i] = indices[i]; 2474 } 2475 baij->nz = nz; 2476 for (i=0; i<mbs; i++) { 2477 baij->ilen[i] = baij->imax[i]; 2478 } 2479 PetscFunctionReturn(0); 2480 } 2481 EXTERN_C_END 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2485 /*@ 2486 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2487 in the matrix. 2488 2489 Input Parameters: 2490 + mat - the SeqBAIJ matrix 2491 - indices - the column indices 2492 2493 Level: advanced 2494 2495 Notes: 2496 This can be called if you have precomputed the nonzero structure of the 2497 matrix and want to provide it to the matrix object to improve the performance 2498 of the MatSetValues() operation. 2499 2500 You MUST have set the correct numbers of nonzeros per row in the call to 2501 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2502 2503 MUST be called before any calls to MatSetValues(); 2504 2505 @*/ 2506 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2507 { 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2512 PetscValidPointer(indices,2); 2513 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2519 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2520 { 2521 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2522 PetscErrorCode ierr; 2523 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2524 PetscReal atmp; 2525 PetscScalar *x,zero = 0.0; 2526 MatScalar *aa; 2527 PetscInt ncols,brow,krow,kcol; 2528 2529 PetscFunctionBegin; 2530 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2531 bs = A->rmap->bs; 2532 aa = a->a; 2533 ai = a->i; 2534 aj = a->j; 2535 mbs = a->mbs; 2536 2537 ierr = VecSet(v,zero);CHKERRQ(ierr); 2538 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2539 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2540 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2541 for (i=0; i<mbs; i++) { 2542 ncols = ai[1] - ai[0]; ai++; 2543 brow = bs*i; 2544 for (j=0; j<ncols; j++){ 2545 for (kcol=0; kcol<bs; kcol++){ 2546 for (krow=0; krow<bs; krow++){ 2547 atmp = PetscAbsScalar(*aa);aa++; 2548 row = brow + krow; /* row index */ 2549 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2550 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2551 } 2552 } 2553 aj++; 2554 } 2555 } 2556 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatCopy_SeqBAIJ" 2562 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2563 { 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 /* If the two matrices have the same copy implementation, use fast copy. */ 2568 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2569 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2570 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2571 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2572 2573 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2574 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2575 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2576 } else { 2577 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2578 } 2579 PetscFunctionReturn(0); 2580 } 2581 2582 #undef __FUNCT__ 2583 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2584 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2585 { 2586 PetscErrorCode ierr; 2587 2588 PetscFunctionBegin; 2589 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2590 PetscFunctionReturn(0); 2591 } 2592 2593 #undef __FUNCT__ 2594 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2595 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2596 { 2597 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2598 PetscFunctionBegin; 2599 *array = a->a; 2600 PetscFunctionReturn(0); 2601 } 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2605 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2606 { 2607 PetscFunctionBegin; 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2613 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2614 { 2615 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2616 PetscErrorCode ierr; 2617 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2618 PetscBLASInt one=1; 2619 2620 PetscFunctionBegin; 2621 if (str == SAME_NONZERO_PATTERN) { 2622 PetscScalar alpha = a; 2623 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2624 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2625 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2626 if (y->xtoy && y->XtoY != X) { 2627 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2628 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2629 } 2630 if (!y->xtoy) { /* get xtoy */ 2631 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2632 y->XtoY = X; 2633 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2634 } 2635 for (i=0; i<x->nz; i++) { 2636 j = 0; 2637 while (j < bs2){ 2638 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2639 j++; 2640 } 2641 } 2642 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2643 } else { 2644 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2651 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2652 { 2653 PetscInt rbs,cbs; 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2658 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2659 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2660 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #undef __FUNCT__ 2665 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2666 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2667 { 2668 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2669 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2670 MatScalar *aa = a->a; 2671 2672 PetscFunctionBegin; 2673 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2674 PetscFunctionReturn(0); 2675 } 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2679 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2680 { 2681 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2682 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2683 MatScalar *aa = a->a; 2684 2685 PetscFunctionBegin; 2686 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2691 2692 #undef __FUNCT__ 2693 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2694 /* 2695 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2696 */ 2697 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2698 { 2699 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2700 PetscErrorCode ierr; 2701 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2702 PetscInt nz = a->i[m],row,*jj,mr,col; 2703 2704 PetscFunctionBegin; 2705 *nn = n; 2706 if (!ia) PetscFunctionReturn(0); 2707 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2708 else { 2709 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2710 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2711 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2712 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2713 jj = a->j; 2714 for (i=0; i<nz; i++) { 2715 collengths[jj[i]]++; 2716 } 2717 cia[0] = oshift; 2718 for (i=0; i<n; i++) { 2719 cia[i+1] = cia[i] + collengths[i]; 2720 } 2721 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2722 jj = a->j; 2723 for (row=0; row<m; row++) { 2724 mr = a->i[row+1] - a->i[row]; 2725 for (i=0; i<mr; i++) { 2726 col = *jj++; 2727 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2728 } 2729 } 2730 ierr = PetscFree(collengths);CHKERRQ(ierr); 2731 *ia = cia; *ja = cja; 2732 } 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2738 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2739 { 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 if (!ia) PetscFunctionReturn(0); 2744 ierr = PetscFree(*ia);CHKERRQ(ierr); 2745 ierr = PetscFree(*ja);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 #undef __FUNCT__ 2750 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2751 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2752 { 2753 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2754 PetscErrorCode ierr; 2755 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2756 PetscScalar dx,*y,*xx,*w3_array; 2757 PetscScalar *vscale_array; 2758 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2759 Vec w1=coloring->w1,w2=coloring->w2,w3; 2760 void *fctx = coloring->fctx; 2761 PetscBool flg = PETSC_FALSE; 2762 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2763 Vec x1_tmp; 2764 2765 PetscFunctionBegin; 2766 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2767 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2768 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2769 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2770 2771 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2772 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2773 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2774 if (flg) { 2775 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2776 } else { 2777 PetscBool assembled; 2778 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2779 if (assembled) { 2780 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2781 } 2782 } 2783 2784 x1_tmp = x1; 2785 if (!coloring->vscale){ 2786 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2787 } 2788 2789 /* 2790 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2791 coloring->F for the coarser grids from the finest 2792 */ 2793 if (coloring->F) { 2794 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2795 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2796 if (m1 != m2) { 2797 coloring->F = 0; 2798 } 2799 } 2800 2801 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2802 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2803 } 2804 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2805 2806 /* Set w1 = F(x1) */ 2807 if (coloring->F) { 2808 w1 = coloring->F; /* use already computed value of function */ 2809 coloring->F = 0; 2810 } else { 2811 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2812 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2813 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2814 } 2815 2816 if (!coloring->w3) { 2817 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2818 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2819 } 2820 w3 = coloring->w3; 2821 2822 CHKMEMQ; 2823 /* Compute all the local scale factors, including ghost points */ 2824 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2825 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2826 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2827 if (ctype == IS_COLORING_GHOSTED){ 2828 col_start = 0; col_end = N; 2829 } else if (ctype == IS_COLORING_GLOBAL){ 2830 xx = xx - start; 2831 vscale_array = vscale_array - start; 2832 col_start = start; col_end = N + start; 2833 } CHKMEMQ; 2834 for (col=col_start; col<col_end; col++){ 2835 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2836 if (coloring->htype[0] == 'w') { 2837 dx = 1.0 + unorm; 2838 } else { 2839 dx = xx[col]; 2840 } 2841 if (dx == (PetscScalar)0.0) dx = 1.0; 2842 #if !defined(PETSC_USE_COMPLEX) 2843 if (dx < umin && dx >= 0.0) dx = umin; 2844 else if (dx < 0.0 && dx > -umin) dx = -umin; 2845 #else 2846 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2847 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2848 #endif 2849 dx *= epsilon; 2850 vscale_array[col] = (PetscScalar)1.0/dx; 2851 } CHKMEMQ; 2852 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2853 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2854 if (ctype == IS_COLORING_GLOBAL){ 2855 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2856 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2857 } 2858 CHKMEMQ; 2859 if (coloring->vscaleforrow) { 2860 vscaleforrow = coloring->vscaleforrow; 2861 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2862 2863 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2864 /* 2865 Loop over each color 2866 */ 2867 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2868 for (k=0; k<coloring->ncolors; k++) { 2869 coloring->currentcolor = k; 2870 for (i=0; i<bs; i++) { 2871 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2872 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2873 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2874 /* 2875 Loop over each column associated with color 2876 adding the perturbation to the vector w3. 2877 */ 2878 for (l=0; l<coloring->ncolumns[k]; l++) { 2879 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2880 if (coloring->htype[0] == 'w') { 2881 dx = 1.0 + unorm; 2882 } else { 2883 dx = xx[col]; 2884 } 2885 if (dx == (PetscScalar)0.0) dx = 1.0; 2886 #if !defined(PETSC_USE_COMPLEX) 2887 if (dx < umin && dx >= 0.0) dx = umin; 2888 else if (dx < 0.0 && dx > -umin) dx = -umin; 2889 #else 2890 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2891 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2892 #endif 2893 dx *= epsilon; 2894 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2895 w3_array[col] += dx; 2896 } 2897 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2898 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2899 2900 /* 2901 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2902 w2 = F(x1 + dx) - F(x1) 2903 */ 2904 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2905 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2906 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2907 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2908 2909 /* 2910 Loop over rows of vector, putting results into Jacobian matrix 2911 */ 2912 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2913 for (l=0; l<coloring->nrows[k]; l++) { 2914 row = bs*coloring->rows[k][l]; /* local row index */ 2915 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2916 for (j=0; j<bs; j++) { 2917 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2918 srows[j] = row + start + j; 2919 } 2920 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2921 } 2922 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2923 } 2924 } /* endof for each color */ 2925 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2926 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2928 ierr = PetscFree(srows);CHKERRQ(ierr); 2929 2930 coloring->currentcolor = -1; 2931 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2932 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2933 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2934 PetscFunctionReturn(0); 2935 } 2936 2937 /* -------------------------------------------------------------------*/ 2938 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2939 MatGetRow_SeqBAIJ, 2940 MatRestoreRow_SeqBAIJ, 2941 MatMult_SeqBAIJ_N, 2942 /* 4*/ MatMultAdd_SeqBAIJ_N, 2943 MatMultTranspose_SeqBAIJ, 2944 MatMultTransposeAdd_SeqBAIJ, 2945 0, 2946 0, 2947 0, 2948 /*10*/ 0, 2949 MatLUFactor_SeqBAIJ, 2950 0, 2951 0, 2952 MatTranspose_SeqBAIJ, 2953 /*15*/ MatGetInfo_SeqBAIJ, 2954 MatEqual_SeqBAIJ, 2955 MatGetDiagonal_SeqBAIJ, 2956 MatDiagonalScale_SeqBAIJ, 2957 MatNorm_SeqBAIJ, 2958 /*20*/ 0, 2959 MatAssemblyEnd_SeqBAIJ, 2960 MatSetOption_SeqBAIJ, 2961 MatZeroEntries_SeqBAIJ, 2962 /*24*/ MatZeroRows_SeqBAIJ, 2963 0, 2964 0, 2965 0, 2966 0, 2967 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2968 0, 2969 0, 2970 MatGetArray_SeqBAIJ, 2971 MatRestoreArray_SeqBAIJ, 2972 /*34*/ MatDuplicate_SeqBAIJ, 2973 0, 2974 0, 2975 MatILUFactor_SeqBAIJ, 2976 0, 2977 /*39*/ MatAXPY_SeqBAIJ, 2978 MatGetSubMatrices_SeqBAIJ, 2979 MatIncreaseOverlap_SeqBAIJ, 2980 MatGetValues_SeqBAIJ, 2981 MatCopy_SeqBAIJ, 2982 /*44*/ 0, 2983 MatScale_SeqBAIJ, 2984 0, 2985 0, 2986 MatZeroRowsColumns_SeqBAIJ, 2987 /*49*/ MatSetBlockSize_SeqBAIJ, 2988 MatGetRowIJ_SeqBAIJ, 2989 MatRestoreRowIJ_SeqBAIJ, 2990 MatGetColumnIJ_SeqBAIJ, 2991 MatRestoreColumnIJ_SeqBAIJ, 2992 /*54*/ MatFDColoringCreate_SeqAIJ, 2993 0, 2994 0, 2995 0, 2996 MatSetValuesBlocked_SeqBAIJ, 2997 /*59*/ MatGetSubMatrix_SeqBAIJ, 2998 MatDestroy_SeqBAIJ, 2999 MatView_SeqBAIJ, 3000 0, 3001 0, 3002 /*64*/ 0, 3003 0, 3004 0, 3005 0, 3006 0, 3007 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 3008 0, 3009 MatConvert_Basic, 3010 0, 3011 0, 3012 /*74*/ 0, 3013 MatFDColoringApply_BAIJ, 3014 0, 3015 0, 3016 0, 3017 /*79*/ 0, 3018 0, 3019 0, 3020 0, 3021 MatLoad_SeqBAIJ, 3022 /*84*/ 0, 3023 0, 3024 0, 3025 0, 3026 0, 3027 /*89*/ 0, 3028 0, 3029 0, 3030 0, 3031 0, 3032 /*94*/ 0, 3033 0, 3034 0, 3035 0, 3036 0, 3037 /*99*/0, 3038 0, 3039 0, 3040 0, 3041 0, 3042 /*104*/0, 3043 MatRealPart_SeqBAIJ, 3044 MatImaginaryPart_SeqBAIJ, 3045 0, 3046 0, 3047 /*109*/0, 3048 0, 3049 0, 3050 0, 3051 MatMissingDiagonal_SeqBAIJ, 3052 /*114*/0, 3053 0, 3054 0, 3055 0, 3056 0, 3057 /*119*/0, 3058 0, 3059 MatMultHermitianTranspose_SeqBAIJ, 3060 MatMultHermitianTransposeAdd_SeqBAIJ, 3061 0, 3062 /*124*/0, 3063 0, 3064 MatInvertBlockDiagonal_SeqBAIJ 3065 }; 3066 3067 EXTERN_C_BEGIN 3068 #undef __FUNCT__ 3069 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3070 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3071 { 3072 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3073 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3074 PetscErrorCode ierr; 3075 3076 PetscFunctionBegin; 3077 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3078 3079 /* allocate space for values if not already there */ 3080 if (!aij->saved_values) { 3081 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3082 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3083 } 3084 3085 /* copy values over */ 3086 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3087 PetscFunctionReturn(0); 3088 } 3089 EXTERN_C_END 3090 3091 EXTERN_C_BEGIN 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3094 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3095 { 3096 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3097 PetscErrorCode ierr; 3098 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3099 3100 PetscFunctionBegin; 3101 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3102 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3103 3104 /* copy values over */ 3105 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3106 PetscFunctionReturn(0); 3107 } 3108 EXTERN_C_END 3109 3110 EXTERN_C_BEGIN 3111 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3112 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3113 EXTERN_C_END 3114 3115 EXTERN_C_BEGIN 3116 #undef __FUNCT__ 3117 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3118 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3119 { 3120 Mat_SeqBAIJ *b; 3121 PetscErrorCode ierr; 3122 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3123 PetscBool flg,skipallocation = PETSC_FALSE; 3124 3125 PetscFunctionBegin; 3126 3127 if (nz == MAT_SKIP_ALLOCATION) { 3128 skipallocation = PETSC_TRUE; 3129 nz = 0; 3130 } 3131 3132 if (bs < 0) { 3133 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3134 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3135 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3136 bs = PetscAbs(bs); 3137 } 3138 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3139 bs = newbs; 3140 3141 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3142 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3143 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3144 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3145 3146 B->preallocated = PETSC_TRUE; 3147 3148 mbs = B->rmap->n/bs; 3149 nbs = B->cmap->n/bs; 3150 bs2 = bs*bs; 3151 3152 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3153 3154 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3155 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3156 if (nnz) { 3157 for (i=0; i<mbs; i++) { 3158 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3159 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3160 } 3161 } 3162 3163 b = (Mat_SeqBAIJ*)B->data; 3164 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3165 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3166 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3167 3168 if (!flg) { 3169 switch (bs) { 3170 case 1: 3171 B->ops->mult = MatMult_SeqBAIJ_1; 3172 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3173 B->ops->sor = MatSOR_SeqBAIJ_1; 3174 break; 3175 case 2: 3176 B->ops->mult = MatMult_SeqBAIJ_2; 3177 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3178 B->ops->sor = MatSOR_SeqBAIJ_2; 3179 break; 3180 case 3: 3181 B->ops->mult = MatMult_SeqBAIJ_3; 3182 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3183 B->ops->sor = MatSOR_SeqBAIJ_3; 3184 break; 3185 case 4: 3186 B->ops->mult = MatMult_SeqBAIJ_4; 3187 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3188 B->ops->sor = MatSOR_SeqBAIJ_4; 3189 break; 3190 case 5: 3191 B->ops->mult = MatMult_SeqBAIJ_5; 3192 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3193 B->ops->sor = MatSOR_SeqBAIJ_5; 3194 break; 3195 case 6: 3196 B->ops->mult = MatMult_SeqBAIJ_6; 3197 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3198 B->ops->sor = MatSOR_SeqBAIJ_6; 3199 break; 3200 case 7: 3201 B->ops->mult = MatMult_SeqBAIJ_7; 3202 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3203 B->ops->sor = MatSOR_SeqBAIJ_7; 3204 break; 3205 case 15: 3206 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3207 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3208 B->ops->sor = MatSOR_SeqBAIJ_N; 3209 break; 3210 default: 3211 B->ops->mult = MatMult_SeqBAIJ_N; 3212 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3213 B->ops->sor = MatSOR_SeqBAIJ_N; 3214 break; 3215 } 3216 } 3217 B->rmap->bs = bs; 3218 b->mbs = mbs; 3219 b->nbs = nbs; 3220 if (!skipallocation) { 3221 if (!b->imax) { 3222 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3223 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3224 b->free_imax_ilen = PETSC_TRUE; 3225 } 3226 /* b->ilen will count nonzeros in each block row so far. */ 3227 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3228 if (!nnz) { 3229 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3230 else if (nz < 0) nz = 1; 3231 for (i=0; i<mbs; i++) b->imax[i] = nz; 3232 nz = nz*mbs; 3233 } else { 3234 nz = 0; 3235 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3236 } 3237 3238 /* allocate the matrix space */ 3239 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3240 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3241 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3242 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3243 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3244 b->singlemalloc = PETSC_TRUE; 3245 b->i[0] = 0; 3246 for (i=1; i<mbs+1; i++) { 3247 b->i[i] = b->i[i-1] + b->imax[i-1]; 3248 } 3249 b->free_a = PETSC_TRUE; 3250 b->free_ij = PETSC_TRUE; 3251 } else { 3252 b->free_a = PETSC_FALSE; 3253 b->free_ij = PETSC_FALSE; 3254 } 3255 3256 B->rmap->bs = bs; 3257 b->bs2 = bs2; 3258 b->mbs = mbs; 3259 b->nz = 0; 3260 b->maxnz = nz; 3261 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3262 PetscFunctionReturn(0); 3263 } 3264 EXTERN_C_END 3265 3266 EXTERN_C_BEGIN 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3269 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3270 { 3271 PetscInt i,m,nz,nz_max=0,*nnz; 3272 PetscScalar *values=0; 3273 PetscErrorCode ierr; 3274 3275 PetscFunctionBegin; 3276 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3277 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3278 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3279 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3280 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3281 m = B->rmap->n/bs; 3282 3283 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3284 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3285 for(i=0; i<m; i++) { 3286 nz = ii[i+1]- ii[i]; 3287 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3288 nz_max = PetscMax(nz_max, nz); 3289 nnz[i] = nz; 3290 } 3291 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3292 ierr = PetscFree(nnz);CHKERRQ(ierr); 3293 3294 values = (PetscScalar*)V; 3295 if (!values) { 3296 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3297 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3298 } 3299 for (i=0; i<m; i++) { 3300 PetscInt ncols = ii[i+1] - ii[i]; 3301 const PetscInt *icols = jj + ii[i]; 3302 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3303 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3304 } 3305 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3306 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3307 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3308 3309 PetscFunctionReturn(0); 3310 } 3311 EXTERN_C_END 3312 3313 3314 EXTERN_C_BEGIN 3315 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3316 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3317 #if defined(PETSC_HAVE_MUMPS) 3318 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3319 #endif 3320 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3321 EXTERN_C_END 3322 3323 /*MC 3324 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3325 block sparse compressed row format. 3326 3327 Options Database Keys: 3328 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3329 3330 Level: beginner 3331 3332 .seealso: MatCreateSeqBAIJ() 3333 M*/ 3334 3335 EXTERN_C_BEGIN 3336 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3337 EXTERN_C_END 3338 3339 EXTERN_C_BEGIN 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatCreate_SeqBAIJ" 3342 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3343 { 3344 PetscErrorCode ierr; 3345 PetscMPIInt size; 3346 Mat_SeqBAIJ *b; 3347 3348 PetscFunctionBegin; 3349 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3350 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3351 3352 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3353 B->data = (void*)b; 3354 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3355 b->row = 0; 3356 b->col = 0; 3357 b->icol = 0; 3358 b->reallocs = 0; 3359 b->saved_values = 0; 3360 3361 b->roworiented = PETSC_TRUE; 3362 b->nonew = 0; 3363 b->diag = 0; 3364 b->solve_work = 0; 3365 b->mult_work = 0; 3366 B->spptr = 0; 3367 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3368 b->keepnonzeropattern = PETSC_FALSE; 3369 b->xtoy = 0; 3370 b->XtoY = 0; 3371 B->same_nonzero = PETSC_FALSE; 3372 3373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3374 "MatGetFactorAvailable_seqbaij_petsc", 3375 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3376 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3377 "MatGetFactor_seqbaij_petsc", 3378 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3379 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3380 "MatGetFactor_seqbaij_bstrm", 3381 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3382 #if defined(PETSC_HAVE_MUMPS) 3383 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3384 #endif 3385 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3386 "MatInvertBlockDiagonal_SeqBAIJ", 3387 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3388 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3389 "MatStoreValues_SeqBAIJ", 3390 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3391 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3392 "MatRetrieveValues_SeqBAIJ", 3393 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3394 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3395 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3396 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3397 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3398 "MatConvert_SeqBAIJ_SeqAIJ", 3399 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3401 "MatConvert_SeqBAIJ_SeqSBAIJ", 3402 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3404 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3405 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3407 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3408 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3410 "MatConvert_SeqBAIJ_SeqBSTRM", 3411 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3413 "MatIsTranspose_SeqBAIJ", 3414 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3415 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 EXTERN_C_END 3419 3420 #undef __FUNCT__ 3421 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3422 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3423 { 3424 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3425 PetscErrorCode ierr; 3426 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3427 3428 PetscFunctionBegin; 3429 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3430 3431 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3432 c->imax = a->imax; 3433 c->ilen = a->ilen; 3434 c->free_imax_ilen = PETSC_FALSE; 3435 } else { 3436 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3437 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3438 for (i=0; i<mbs; i++) { 3439 c->imax[i] = a->imax[i]; 3440 c->ilen[i] = a->ilen[i]; 3441 } 3442 c->free_imax_ilen = PETSC_TRUE; 3443 } 3444 3445 /* allocate the matrix space */ 3446 if (mallocmatspace){ 3447 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3448 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3449 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3450 c->singlemalloc = PETSC_FALSE; 3451 c->free_ij = PETSC_FALSE; 3452 c->i = a->i; 3453 c->j = a->j; 3454 c->parent = A; 3455 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3456 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3457 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3458 } else { 3459 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3460 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3461 c->singlemalloc = PETSC_TRUE; 3462 c->free_ij = PETSC_TRUE; 3463 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3464 if (mbs > 0) { 3465 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3466 if (cpvalues == MAT_COPY_VALUES) { 3467 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3468 } else { 3469 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3470 } 3471 } 3472 } 3473 } 3474 3475 c->roworiented = a->roworiented; 3476 c->nonew = a->nonew; 3477 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3478 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3479 c->bs2 = a->bs2; 3480 c->mbs = a->mbs; 3481 c->nbs = a->nbs; 3482 3483 if (a->diag) { 3484 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3485 c->diag = a->diag; 3486 c->free_diag = PETSC_FALSE; 3487 } else { 3488 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3489 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3490 for (i=0; i<mbs; i++) { 3491 c->diag[i] = a->diag[i]; 3492 } 3493 c->free_diag = PETSC_TRUE; 3494 } 3495 } else c->diag = 0; 3496 c->nz = a->nz; 3497 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3498 c->solve_work = 0; 3499 c->mult_work = 0; 3500 c->free_a = PETSC_TRUE; 3501 c->free_ij = PETSC_TRUE; 3502 C->preallocated = PETSC_TRUE; 3503 C->assembled = PETSC_TRUE; 3504 3505 c->compressedrow.use = a->compressedrow.use; 3506 c->compressedrow.nrows = a->compressedrow.nrows; 3507 c->compressedrow.check = a->compressedrow.check; 3508 if (a->compressedrow.use){ 3509 i = a->compressedrow.nrows; 3510 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3511 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3512 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3513 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3514 } else { 3515 c->compressedrow.use = PETSC_FALSE; 3516 c->compressedrow.i = PETSC_NULL; 3517 c->compressedrow.rindex = PETSC_NULL; 3518 } 3519 C->same_nonzero = A->same_nonzero; 3520 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3521 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3522 PetscFunctionReturn(0); 3523 } 3524 3525 #undef __FUNCT__ 3526 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3527 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3528 { 3529 PetscErrorCode ierr; 3530 3531 PetscFunctionBegin; 3532 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3533 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3534 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3535 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 #undef __FUNCT__ 3540 #define __FUNCT__ "MatLoad_SeqBAIJ" 3541 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3542 { 3543 Mat_SeqBAIJ *a; 3544 PetscErrorCode ierr; 3545 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3546 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3547 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3548 PetscInt *masked,nmask,tmp,bs2,ishift; 3549 PetscMPIInt size; 3550 int fd; 3551 PetscScalar *aa; 3552 MPI_Comm comm = ((PetscObject)viewer)->comm; 3553 3554 PetscFunctionBegin; 3555 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3556 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3557 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3558 bs2 = bs*bs; 3559 3560 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3561 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3562 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3563 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3564 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3565 M = header[1]; N = header[2]; nz = header[3]; 3566 3567 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3568 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3569 3570 /* 3571 This code adds extra rows to make sure the number of rows is 3572 divisible by the blocksize 3573 */ 3574 mbs = M/bs; 3575 extra_rows = bs - M + bs*(mbs); 3576 if (extra_rows == bs) extra_rows = 0; 3577 else mbs++; 3578 if (extra_rows) { 3579 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3580 } 3581 3582 /* Set global sizes if not already set */ 3583 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3584 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3585 } else { /* Check if the matrix global sizes are correct */ 3586 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3587 if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */ 3588 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3589 } 3590 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3591 } 3592 3593 /* read in row lengths */ 3594 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3595 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3596 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3597 3598 /* read in column indices */ 3599 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3600 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3601 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3602 3603 /* loop over row lengths determining block row lengths */ 3604 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3605 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3606 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3607 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3608 rowcount = 0; 3609 nzcount = 0; 3610 for (i=0; i<mbs; i++) { 3611 nmask = 0; 3612 for (j=0; j<bs; j++) { 3613 kmax = rowlengths[rowcount]; 3614 for (k=0; k<kmax; k++) { 3615 tmp = jj[nzcount++]/bs; 3616 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3617 } 3618 rowcount++; 3619 } 3620 browlengths[i] += nmask; 3621 /* zero out the mask elements we set */ 3622 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3623 } 3624 3625 /* Do preallocation */ 3626 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3627 a = (Mat_SeqBAIJ*)newmat->data; 3628 3629 /* set matrix "i" values */ 3630 a->i[0] = 0; 3631 for (i=1; i<= mbs; i++) { 3632 a->i[i] = a->i[i-1] + browlengths[i-1]; 3633 a->ilen[i-1] = browlengths[i-1]; 3634 } 3635 a->nz = 0; 3636 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3637 3638 /* read in nonzero values */ 3639 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3640 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3641 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3642 3643 /* set "a" and "j" values into matrix */ 3644 nzcount = 0; jcount = 0; 3645 for (i=0; i<mbs; i++) { 3646 nzcountb = nzcount; 3647 nmask = 0; 3648 for (j=0; j<bs; j++) { 3649 kmax = rowlengths[i*bs+j]; 3650 for (k=0; k<kmax; k++) { 3651 tmp = jj[nzcount++]/bs; 3652 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3653 } 3654 } 3655 /* sort the masked values */ 3656 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3657 3658 /* set "j" values into matrix */ 3659 maskcount = 1; 3660 for (j=0; j<nmask; j++) { 3661 a->j[jcount++] = masked[j]; 3662 mask[masked[j]] = maskcount++; 3663 } 3664 /* set "a" values into matrix */ 3665 ishift = bs2*a->i[i]; 3666 for (j=0; j<bs; j++) { 3667 kmax = rowlengths[i*bs+j]; 3668 for (k=0; k<kmax; k++) { 3669 tmp = jj[nzcountb]/bs ; 3670 block = mask[tmp] - 1; 3671 point = jj[nzcountb] - bs*tmp; 3672 idx = ishift + bs2*block + j + bs*point; 3673 a->a[idx] = (MatScalar)aa[nzcountb++]; 3674 } 3675 } 3676 /* zero out the mask elements we set */ 3677 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3678 } 3679 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3680 3681 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3682 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3683 ierr = PetscFree(aa);CHKERRQ(ierr); 3684 ierr = PetscFree(jj);CHKERRQ(ierr); 3685 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3686 3687 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3688 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3689 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3690 PetscFunctionReturn(0); 3691 } 3692 3693 #undef __FUNCT__ 3694 #define __FUNCT__ "MatCreateSeqBAIJ" 3695 /*@C 3696 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3697 compressed row) format. For good matrix assembly performance the 3698 user should preallocate the matrix storage by setting the parameter nz 3699 (or the array nnz). By setting these parameters accurately, performance 3700 during matrix assembly can be increased by more than a factor of 50. 3701 3702 Collective on MPI_Comm 3703 3704 Input Parameters: 3705 + comm - MPI communicator, set to PETSC_COMM_SELF 3706 . bs - size of block 3707 . m - number of rows 3708 . n - number of columns 3709 . nz - number of nonzero blocks per block row (same for all rows) 3710 - nnz - array containing the number of nonzero blocks in the various block rows 3711 (possibly different for each block row) or PETSC_NULL 3712 3713 Output Parameter: 3714 . A - the matrix 3715 3716 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3717 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3718 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3719 3720 Options Database Keys: 3721 . -mat_no_unroll - uses code that does not unroll the loops in the 3722 block calculations (much slower) 3723 . -mat_block_size - size of the blocks to use 3724 3725 Level: intermediate 3726 3727 Notes: 3728 The number of rows and columns must be divisible by blocksize. 3729 3730 If the nnz parameter is given then the nz parameter is ignored 3731 3732 A nonzero block is any block that as 1 or more nonzeros in it 3733 3734 The block AIJ format is fully compatible with standard Fortran 77 3735 storage. That is, the stored row and column indices can begin at 3736 either one (as in Fortran) or zero. See the users' manual for details. 3737 3738 Specify the preallocated storage with either nz or nnz (not both). 3739 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3740 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3741 matrices. 3742 3743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3744 @*/ 3745 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3746 { 3747 PetscErrorCode ierr; 3748 3749 PetscFunctionBegin; 3750 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3751 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3752 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3753 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3754 PetscFunctionReturn(0); 3755 } 3756 3757 #undef __FUNCT__ 3758 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3759 /*@C 3760 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3761 per row in the matrix. For good matrix assembly performance the 3762 user should preallocate the matrix storage by setting the parameter nz 3763 (or the array nnz). By setting these parameters accurately, performance 3764 during matrix assembly can be increased by more than a factor of 50. 3765 3766 Collective on MPI_Comm 3767 3768 Input Parameters: 3769 + A - the matrix 3770 . bs - size of block 3771 . nz - number of block nonzeros per block row (same for all rows) 3772 - nnz - array containing the number of block nonzeros in the various block rows 3773 (possibly different for each block row) or PETSC_NULL 3774 3775 Options Database Keys: 3776 . -mat_no_unroll - uses code that does not unroll the loops in the 3777 block calculations (much slower) 3778 . -mat_block_size - size of the blocks to use 3779 3780 Level: intermediate 3781 3782 Notes: 3783 If the nnz parameter is given then the nz parameter is ignored 3784 3785 You can call MatGetInfo() to get information on how effective the preallocation was; 3786 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3787 You can also run with the option -info and look for messages with the string 3788 malloc in them to see if additional memory allocation was needed. 3789 3790 The block AIJ format is fully compatible with standard Fortran 77 3791 storage. That is, the stored row and column indices can begin at 3792 either one (as in Fortran) or zero. See the users' manual for details. 3793 3794 Specify the preallocated storage with either nz or nnz (not both). 3795 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3796 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3797 3798 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3799 @*/ 3800 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3801 { 3802 PetscErrorCode ierr; 3803 3804 PetscFunctionBegin; 3805 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3806 PetscFunctionReturn(0); 3807 } 3808 3809 #undef __FUNCT__ 3810 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3811 /*@C 3812 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3813 (the default sequential PETSc format). 3814 3815 Collective on MPI_Comm 3816 3817 Input Parameters: 3818 + A - the matrix 3819 . i - the indices into j for the start of each local row (starts with zero) 3820 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3821 - v - optional values in the matrix 3822 3823 Level: developer 3824 3825 .keywords: matrix, aij, compressed row, sparse 3826 3827 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3828 @*/ 3829 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3830 { 3831 PetscErrorCode ierr; 3832 3833 PetscFunctionBegin; 3834 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3835 PetscFunctionReturn(0); 3836 } 3837 3838 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3841 /*@ 3842 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3843 3844 Collective on MPI_Comm 3845 3846 Input Parameters: 3847 + comm - must be an MPI communicator of size 1 3848 . bs - size of block 3849 . m - number of rows 3850 . n - number of columns 3851 . i - row indices 3852 . j - column indices 3853 - a - matrix values 3854 3855 Output Parameter: 3856 . mat - the matrix 3857 3858 Level: advanced 3859 3860 Notes: 3861 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3862 once the matrix is destroyed 3863 3864 You cannot set new nonzero locations into this matrix, that will generate an error. 3865 3866 The i and j indices are 0 based 3867 3868 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3869 3870 3871 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3872 3873 @*/ 3874 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3875 { 3876 PetscErrorCode ierr; 3877 PetscInt ii; 3878 Mat_SeqBAIJ *baij; 3879 3880 PetscFunctionBegin; 3881 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3882 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3883 3884 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3885 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3886 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3887 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3888 baij = (Mat_SeqBAIJ*)(*mat)->data; 3889 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3890 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3891 3892 baij->i = i; 3893 baij->j = j; 3894 baij->a = a; 3895 baij->singlemalloc = PETSC_FALSE; 3896 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3897 baij->free_a = PETSC_FALSE; 3898 baij->free_ij = PETSC_FALSE; 3899 3900 for (ii=0; ii<m; ii++) { 3901 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3902 #if defined(PETSC_USE_DEBUG) 3903 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3904 #endif 3905 } 3906 #if defined(PETSC_USE_DEBUG) 3907 for (ii=0; ii<baij->i[m]; ii++) { 3908 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3909 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3910 } 3911 #endif 3912 3913 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3914 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3915 PetscFunctionReturn(0); 3916 } 3917