1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 211 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 212 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 #undef __FUNCT__ 217 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 218 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 219 { 220 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 221 PetscScalar *x,x1,x2,s1,s2; 222 const PetscScalar *b; 223 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 224 PetscErrorCode ierr; 225 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 226 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 227 228 PetscFunctionBegin; 229 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 235 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 246 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 248 i2 = 2; 249 idiag += 4; 250 for (i=1; i<m; i++) { 251 v = aa + 4*ai[i]; 252 vi = aj + ai[i]; 253 nz = diag[i] - ai[i]; 254 s1 = b[i2]; s2 = b[i2+1]; 255 for (j=0; j<nz; j++) { 256 idx = 2*vi[j]; 257 it = 4*j; 258 x1 = x[idx]; x2 = x[1+idx]; 259 s1 -= v[it]*x1 + v[it+2]*x2; 260 s2 -= v[it+1]*x1 + v[it+3]*x2; 261 } 262 x[i2] = idiag[0]*s1 + idiag[2]*s2; 263 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 264 idiag += 4; 265 i2 += 2; 266 } 267 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 268 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 269 } 270 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 271 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 272 i2 = 0; 273 mdiag = a->idiag+4*a->mbs; 274 for (i=0; i<m; i++) { 275 x1 = x[i2]; x2 = x[i2+1]; 276 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 277 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 278 mdiag += 4; 279 i2 += 2; 280 } 281 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 282 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 283 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 284 } 285 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 286 idiag = a->idiag+4*a->mbs - 4; 287 i2 = 2*m - 2; 288 x1 = x[i2]; x2 = x[i2+1]; 289 x[i2] = idiag[0]*x1 + idiag[2]*x2; 290 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 291 idiag -= 4; 292 i2 -= 2; 293 for (i=m-2; i>=0; i--) { 294 v = aa + 4*(diag[i]+1); 295 vi = aj + diag[i] + 1; 296 nz = ai[i+1] - diag[i] - 1; 297 s1 = x[i2]; s2 = x[i2+1]; 298 for (j=0; j<nz; j++) { 299 idx = 2*vi[j]; 300 it = 4*j; 301 x1 = x[idx]; x2 = x[1+idx]; 302 s1 -= v[it]*x1 + v[it+2]*x2; 303 s2 -= v[it+1]*x1 + v[it+3]*x2; 304 } 305 x[i2] = idiag[0]*s1 + idiag[2]*s2; 306 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 307 idiag -= 4; 308 i2 -= 2; 309 } 310 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 311 } 312 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 320 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 420 } 421 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 422 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 429 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 432 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 433 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 434 const PetscScalar *b; 435 PetscErrorCode ierr; 436 PetscInt m = a->mbs,i,i2,nz,idx; 437 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 438 439 PetscFunctionBegin; 440 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 441 its = its*lits; 442 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 443 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 444 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 445 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 446 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 447 448 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 449 450 diag = a->diag; 451 idiag = a->idiag; 452 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 453 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 454 455 if (flag & SOR_ZERO_INITIAL_GUESS) { 456 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 457 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 458 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 459 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 460 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 461 i2 = 4; 462 idiag += 16; 463 for (i=1; i<m; i++) { 464 v = aa + 16*ai[i]; 465 vi = aj + ai[i]; 466 nz = diag[i] - ai[i]; 467 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 468 while (nz--) { 469 idx = 4*(*vi++); 470 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 471 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 472 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 473 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 474 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 475 v += 16; 476 } 477 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 478 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 479 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 480 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 481 idiag += 16; 482 i2 += 4; 483 } 484 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 485 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 486 } 487 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 488 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 489 i2 = 0; 490 mdiag = a->idiag+16*a->mbs; 491 for (i=0; i<m; i++) { 492 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 493 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 494 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 495 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 496 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 497 mdiag += 16; 498 i2 += 4; 499 } 500 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 501 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 502 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 503 } 504 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 505 idiag = a->idiag+16*a->mbs - 16; 506 i2 = 4*m - 4; 507 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 508 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 509 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 510 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 511 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 512 idiag -= 16; 513 i2 -= 4; 514 for (i=m-2; i>=0; i--) { 515 v = aa + 16*(diag[i]+1); 516 vi = aj + diag[i] + 1; 517 nz = ai[i+1] - diag[i] - 1; 518 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 519 while (nz--) { 520 idx = 4*(*vi++); 521 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 522 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 523 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 524 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 525 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 526 v += 16; 527 } 528 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 529 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 530 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 531 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 532 idiag -= 16; 533 i2 -= 4; 534 } 535 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 536 } 537 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 538 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 539 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 540 PetscFunctionReturn(0); 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 545 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 546 { 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 549 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 550 const PetscScalar *b; 551 PetscErrorCode ierr; 552 PetscInt m = a->mbs,i,i2,nz,idx; 553 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 554 555 PetscFunctionBegin; 556 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 557 its = its*lits; 558 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 559 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 560 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 561 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 562 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 563 564 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 565 566 diag = a->diag; 567 idiag = a->idiag; 568 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 569 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 570 571 if (flag & SOR_ZERO_INITIAL_GUESS) { 572 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 573 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 574 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 575 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 576 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 577 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 578 i2 = 5; 579 idiag += 25; 580 for (i=1; i<m; i++) { 581 v = aa + 25*ai[i]; 582 vi = aj + ai[i]; 583 nz = diag[i] - ai[i]; 584 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 585 while (nz--) { 586 idx = 5*(*vi++); 587 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 588 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 589 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 590 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 591 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 592 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 593 v += 25; 594 } 595 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 596 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 597 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 598 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 599 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 600 idiag += 25; 601 i2 += 5; 602 } 603 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 604 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 605 } 606 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 607 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 608 i2 = 0; 609 mdiag = a->idiag+25*a->mbs; 610 for (i=0; i<m; i++) { 611 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 612 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 613 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 614 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 615 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 616 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 617 mdiag += 25; 618 i2 += 5; 619 } 620 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 621 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 622 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 623 } 624 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 625 idiag = a->idiag+25*a->mbs - 25; 626 i2 = 5*m - 5; 627 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 628 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 629 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 630 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 631 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 632 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 633 idiag -= 25; 634 i2 -= 5; 635 for (i=m-2; i>=0; i--) { 636 v = aa + 25*(diag[i]+1); 637 vi = aj + diag[i] + 1; 638 nz = ai[i+1] - diag[i] - 1; 639 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 640 while (nz--) { 641 idx = 5*(*vi++); 642 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 643 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 644 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 645 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 646 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 647 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 648 v += 25; 649 } 650 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 651 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 652 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 653 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 654 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 655 idiag -= 25; 656 i2 -= 5; 657 } 658 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 659 } 660 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 661 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 662 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 668 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 669 { 670 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 671 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 672 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 673 const PetscScalar *b; 674 PetscErrorCode ierr; 675 PetscInt m = a->mbs,i,i2,nz,idx; 676 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 677 678 PetscFunctionBegin; 679 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 680 its = its*lits; 681 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 682 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 683 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 684 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 685 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 686 687 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 688 689 diag = a->diag; 690 idiag = a->idiag; 691 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 692 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 693 694 if (flag & SOR_ZERO_INITIAL_GUESS) { 695 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 696 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 697 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 698 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 699 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 700 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 701 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 702 i2 = 6; 703 idiag += 36; 704 for (i=1; i<m; i++) { 705 v = aa + 36*ai[i]; 706 vi = aj + ai[i]; 707 nz = diag[i] - ai[i]; 708 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 709 while (nz--) { 710 idx = 6*(*vi++); 711 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 712 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 713 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 714 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 715 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 716 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 717 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 718 v += 36; 719 } 720 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 721 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 722 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 723 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 724 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 725 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 726 idiag += 36; 727 i2 += 6; 728 } 729 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 730 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 731 } 732 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 733 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 734 i2 = 0; 735 mdiag = a->idiag+36*a->mbs; 736 for (i=0; i<m; i++) { 737 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 738 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 739 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 740 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 741 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 742 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 743 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 744 mdiag += 36; 745 i2 += 6; 746 } 747 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 748 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 749 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 750 } 751 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 752 idiag = a->idiag+36*a->mbs - 36; 753 i2 = 6*m - 6; 754 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 755 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 756 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 757 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 758 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 759 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 760 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 761 idiag -= 36; 762 i2 -= 6; 763 for (i=m-2; i>=0; i--) { 764 v = aa + 36*(diag[i]+1); 765 vi = aj + diag[i] + 1; 766 nz = ai[i+1] - diag[i] - 1; 767 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 768 while (nz--) { 769 idx = 6*(*vi++); 770 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 771 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 772 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 773 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 774 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 775 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 776 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 777 v += 36; 778 } 779 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 780 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 781 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 782 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 783 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 784 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 785 idiag -= 36; 786 i2 -= 6; 787 } 788 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 791 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 792 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 798 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 799 { 800 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 801 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 802 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 803 const PetscScalar *b; 804 PetscErrorCode ierr; 805 PetscInt m = a->mbs,i,i2,nz,idx; 806 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 807 808 PetscFunctionBegin; 809 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 810 its = its*lits; 811 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 812 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 813 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 814 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 815 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 816 817 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 818 819 diag = a->diag; 820 idiag = a->idiag; 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 823 824 if (flag & SOR_ZERO_INITIAL_GUESS) { 825 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 826 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 827 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 828 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 829 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 830 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 831 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 832 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 833 i2 = 7; 834 idiag += 49; 835 for (i=1; i<m; i++) { 836 v = aa + 49*ai[i]; 837 vi = aj + ai[i]; 838 nz = diag[i] - ai[i]; 839 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 843 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 844 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 845 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 846 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 847 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 848 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 849 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 850 v += 49; 851 } 852 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 853 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 854 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 855 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 856 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 857 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 858 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 859 idiag += 49; 860 i2 += 7; 861 } 862 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 863 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 864 } 865 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 866 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 867 i2 = 0; 868 mdiag = a->idiag+49*a->mbs; 869 for (i=0; i<m; i++) { 870 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 871 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 872 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 873 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 874 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 875 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 876 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 877 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 878 mdiag += 49; 879 i2 += 7; 880 } 881 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 882 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 883 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 884 } 885 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 886 idiag = a->idiag+49*a->mbs - 49; 887 i2 = 7*m - 7; 888 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 889 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 890 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 891 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 892 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 893 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 894 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 895 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 896 idiag -= 49; 897 i2 -= 7; 898 for (i=m-2; i>=0; i--) { 899 v = aa + 49*(diag[i]+1); 900 vi = aj + diag[i] + 1; 901 nz = ai[i+1] - diag[i] - 1; 902 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 903 while (nz--) { 904 idx = 7*(*vi++); 905 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 906 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 907 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 908 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 909 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 910 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 911 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 912 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 913 v += 49; 914 } 915 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 916 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 917 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 918 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 919 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 920 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 921 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 922 idiag -= 49; 923 i2 -= 7; 924 } 925 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 926 } 927 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 933 #undef __FUNCT__ 934 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 935 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 936 { 937 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 938 PetscScalar *x,*work,*w,*workt; 939 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 940 const PetscScalar *b; 941 PetscErrorCode ierr; 942 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 943 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 944 945 PetscFunctionBegin; 946 its = its*lits; 947 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 948 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 949 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 950 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 951 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 952 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 953 954 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 955 956 diag = a->diag; 957 idiag = a->idiag; 958 if (!a->mult_work) { 959 k = PetscMax(A->rmap->n,A->cmap->n); 960 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 961 } 962 work = a->mult_work; 963 if (!a->sor_work) { 964 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 965 } 966 w = a->sor_work; 967 968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 969 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 970 971 if (flag & SOR_ZERO_INITIAL_GUESS) { 972 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 973 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 974 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 975 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 976 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 977 i2 = bs; 978 idiag += bs2; 979 for (i=1; i<m; i++) { 980 v = aa + bs2*ai[i]; 981 vi = aj + ai[i]; 982 nz = diag[i] - ai[i]; 983 984 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 985 /* copy all rows of x that are needed into contiguous space */ 986 workt = work; 987 for (j=0; j<nz; j++) { 988 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 989 workt += bs; 990 } 991 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 992 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 993 while (nz--) { 994 idx = N*(*vi++); 995 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 996 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 997 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 998 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 999 v += N2; 1000 } */ 1001 1002 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1003 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1004 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1005 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1006 1007 idiag += bs2; 1008 i2 += bs; 1009 } 1010 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1011 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1012 } 1013 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1014 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1015 i2 = 0; 1016 mdiag = a->idiag+bs2*a->mbs; 1017 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1018 for (i=0; i<m; i++) { 1019 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1020 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1021 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1022 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1023 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1024 1025 mdiag += bs2; 1026 i2 += bs; 1027 } 1028 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1029 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1030 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1031 } 1032 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1033 idiag = a->idiag+bs2*a->mbs - bs2; 1034 i2 = bs*m - bs; 1035 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1036 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1037 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1038 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1039 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1040 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1041 idiag -= bs2; 1042 i2 -= bs; 1043 for (i=m-2; i>=0; i--) { 1044 v = aa + bs2*(diag[i]+1); 1045 vi = aj + diag[i] + 1; 1046 nz = ai[i+1] - diag[i] - 1; 1047 1048 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1049 /* copy all rows of x that are needed into contiguous space */ 1050 workt = work; 1051 for (j=0; j<nz; j++) { 1052 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 workt += bs; 1054 } 1055 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1056 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1057 while (nz--) { 1058 idx = N*(*vi++); 1059 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1060 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1061 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1062 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1063 v += N2; 1064 } */ 1065 1066 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1067 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1068 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1069 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1070 idiag -= bs2; 1071 i2 -= bs; 1072 } 1073 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1074 } 1075 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1076 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1077 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 /* 1082 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1083 */ 1084 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1085 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1086 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1087 #define matsetvaluesblocked4_ matsetvaluesblocked4 1088 #endif 1089 1090 EXTERN_C_BEGIN 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "matsetvaluesblocked4_" 1093 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1094 { 1095 Mat A = *AA; 1096 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1097 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1098 PetscInt *ai=a->i,*ailen=a->ilen; 1099 PetscInt *aj=a->j,stepval,lastcol = -1; 1100 const PetscScalar *value = v; 1101 MatScalar *ap,*aa = a->a,*bap; 1102 1103 PetscFunctionBegin; 1104 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1105 stepval = (n-1)*4; 1106 for (k=0; k<m; k++) { /* loop over added rows */ 1107 row = im[k]; 1108 rp = aj + ai[row]; 1109 ap = aa + 16*ai[row]; 1110 nrow = ailen[row]; 1111 low = 0; 1112 high = nrow; 1113 for (l=0; l<n; l++) { /* loop over added columns */ 1114 col = in[l]; 1115 if (col <= lastcol) low = 0; 1116 else high = nrow; 1117 lastcol = col; 1118 value = v + k*(stepval+4 + l)*4; 1119 while (high-low > 7) { 1120 t = (low+high)/2; 1121 if (rp[t] > col) high = t; 1122 else low = t; 1123 } 1124 for (i=low; i<high; i++) { 1125 if (rp[i] > col) break; 1126 if (rp[i] == col) { 1127 bap = ap + 16*i; 1128 for (ii=0; ii<4; ii++,value+=stepval) { 1129 for (jj=ii; jj<16; jj+=4) { 1130 bap[jj] += *value++; 1131 } 1132 } 1133 goto noinsert2; 1134 } 1135 } 1136 N = nrow++ - 1; 1137 high++; /* added new column index thus must search to one higher than before */ 1138 /* shift up all the later entries in this row */ 1139 for (ii=N; ii>=i; ii--) { 1140 rp[ii+1] = rp[ii]; 1141 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1142 } 1143 if (N >= i) { 1144 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1145 } 1146 rp[i] = col; 1147 bap = ap + 16*i; 1148 for (ii=0; ii<4; ii++,value+=stepval) { 1149 for (jj=ii; jj<16; jj+=4) { 1150 bap[jj] = *value++; 1151 } 1152 } 1153 noinsert2:; 1154 low = i; 1155 } 1156 ailen[row] = nrow; 1157 } 1158 PetscFunctionReturnVoid(); 1159 } 1160 EXTERN_C_END 1161 1162 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1163 #define matsetvalues4_ MATSETVALUES4 1164 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1165 #define matsetvalues4_ matsetvalues4 1166 #endif 1167 1168 EXTERN_C_BEGIN 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatSetValues4_" 1171 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1172 { 1173 Mat A = *AA; 1174 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1175 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1176 PetscInt *ai=a->i,*ailen=a->ilen; 1177 PetscInt *aj=a->j,brow,bcol; 1178 PetscInt ridx,cidx,lastcol = -1; 1179 MatScalar *ap,value,*aa=a->a,*bap; 1180 1181 PetscFunctionBegin; 1182 for (k=0; k<m; k++) { /* loop over added rows */ 1183 row = im[k]; brow = row/4; 1184 rp = aj + ai[brow]; 1185 ap = aa + 16*ai[brow]; 1186 nrow = ailen[brow]; 1187 low = 0; 1188 high = nrow; 1189 for (l=0; l<n; l++) { /* loop over added columns */ 1190 col = in[l]; bcol = col/4; 1191 ridx = row % 4; cidx = col % 4; 1192 value = v[l + k*n]; 1193 if (col <= lastcol) low = 0; 1194 else high = nrow; 1195 lastcol = col; 1196 while (high-low > 7) { 1197 t = (low+high)/2; 1198 if (rp[t] > bcol) high = t; 1199 else low = t; 1200 } 1201 for (i=low; i<high; i++) { 1202 if (rp[i] > bcol) break; 1203 if (rp[i] == bcol) { 1204 bap = ap + 16*i + 4*cidx + ridx; 1205 *bap += value; 1206 goto noinsert1; 1207 } 1208 } 1209 N = nrow++ - 1; 1210 high++; /* added new column thus must search to one higher than before */ 1211 /* shift up all the later entries in this row */ 1212 for (ii=N; ii>=i; ii--) { 1213 rp[ii+1] = rp[ii]; 1214 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1215 } 1216 if (N>=i) { 1217 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1218 } 1219 rp[i] = bcol; 1220 ap[16*i + 4*cidx + ridx] = value; 1221 noinsert1:; 1222 low = i; 1223 } 1224 ailen[brow] = nrow; 1225 } 1226 PetscFunctionReturnVoid(); 1227 } 1228 EXTERN_C_END 1229 1230 /* 1231 Checks for missing diagonals 1232 */ 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1235 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1236 { 1237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1238 PetscErrorCode ierr; 1239 PetscInt *diag,*jj = a->j,i; 1240 1241 PetscFunctionBegin; 1242 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1243 *missing = PETSC_FALSE; 1244 if (A->rmap->n > 0 && !jj) { 1245 *missing = PETSC_TRUE; 1246 if (d) *d = 0; 1247 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1248 } else { 1249 diag = a->diag; 1250 for (i=0; i<a->mbs; i++) { 1251 if (jj[diag[i]] != i) { 1252 *missing = PETSC_TRUE; 1253 if (d) *d = i; 1254 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1255 break; 1256 } 1257 } 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1264 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1265 { 1266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1267 PetscErrorCode ierr; 1268 PetscInt i,j,m = a->mbs; 1269 1270 PetscFunctionBegin; 1271 if (!a->diag) { 1272 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1273 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1274 a->free_diag = PETSC_TRUE; 1275 } 1276 for (i=0; i<m; i++) { 1277 a->diag[i] = a->i[i+1]; 1278 for (j=a->i[i]; j<a->i[i+1]; j++) { 1279 if (a->j[j] == i) { 1280 a->diag[i] = j; 1281 break; 1282 } 1283 } 1284 } 1285 PetscFunctionReturn(0); 1286 } 1287 1288 1289 #undef __FUNCT__ 1290 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1291 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1292 { 1293 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1294 PetscErrorCode ierr; 1295 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1296 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1297 1298 PetscFunctionBegin; 1299 *nn = n; 1300 if (!ia) PetscFunctionReturn(0); 1301 if (symmetric) { 1302 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1303 nz = tia[n]; 1304 } else { 1305 tia = a->i; tja = a->j; 1306 } 1307 1308 if (!blockcompressed && bs > 1) { 1309 (*nn) *= bs; 1310 /* malloc & create the natural set of indices */ 1311 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1312 if (n) { 1313 (*ia)[0] = 0; 1314 for (j=1; j<bs; j++) { 1315 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1316 } 1317 } 1318 1319 for (i=1; i<n; i++) { 1320 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1321 for (j=1; j<bs; j++) { 1322 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1323 } 1324 } 1325 if (n) { 1326 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1327 } 1328 1329 if (inja) { 1330 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<n; i++) { 1333 for (j=0; j<bs; j++) { 1334 for (k=tia[i]; k<tia[i+1]; k++) { 1335 for (l=0; l<bs; l++) { 1336 (*ja)[cnt++] = bs*tja[k] + l; 1337 } 1338 } 1339 } 1340 } 1341 } 1342 1343 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1344 ierr = PetscFree(tia);CHKERRQ(ierr); 1345 ierr = PetscFree(tja);CHKERRQ(ierr); 1346 } 1347 } else if (oshift == 1) { 1348 if (symmetric) { 1349 nz = tia[A->rmap->n/bs]; 1350 /* add 1 to i and j indices */ 1351 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1352 *ia = tia; 1353 if (ja) { 1354 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1355 *ja = tja; 1356 } 1357 } else { 1358 nz = a->i[A->rmap->n/bs]; 1359 /* malloc space and add 1 to i and j indices */ 1360 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1361 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1362 if (ja) { 1363 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1364 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1365 } 1366 } 1367 } else { 1368 *ia = tia; 1369 if (ja) *ja = tja; 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1376 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1377 { 1378 PetscErrorCode ierr; 1379 1380 PetscFunctionBegin; 1381 if (!ia) PetscFunctionReturn(0); 1382 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1383 ierr = PetscFree(*ia);CHKERRQ(ierr); 1384 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1385 } 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1391 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1392 { 1393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 #if defined(PETSC_USE_LOG) 1398 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1399 #endif 1400 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1401 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1402 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1403 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1404 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1405 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1406 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1407 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1408 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1409 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1410 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1411 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1412 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1413 1414 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1415 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1416 ierr = PetscFree(A->data);CHKERRQ(ierr); 1417 1418 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1423 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1424 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1425 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1426 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1427 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1428 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1434 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1435 { 1436 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1437 PetscErrorCode ierr; 1438 1439 PetscFunctionBegin; 1440 switch (op) { 1441 case MAT_ROW_ORIENTED: 1442 a->roworiented = flg; 1443 break; 1444 case MAT_KEEP_NONZERO_PATTERN: 1445 a->keepnonzeropattern = flg; 1446 break; 1447 case MAT_NEW_NONZERO_LOCATIONS: 1448 a->nonew = (flg ? 0 : 1); 1449 break; 1450 case MAT_NEW_NONZERO_LOCATION_ERR: 1451 a->nonew = (flg ? -1 : 0); 1452 break; 1453 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1454 a->nonew = (flg ? -2 : 0); 1455 break; 1456 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1457 a->nounused = (flg ? -1 : 0); 1458 break; 1459 case MAT_CHECK_COMPRESSED_ROW: 1460 a->compressedrow.check = flg; 1461 break; 1462 case MAT_NEW_DIAGONALS: 1463 case MAT_IGNORE_OFF_PROC_ENTRIES: 1464 case MAT_USE_HASH_TABLE: 1465 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1466 break; 1467 case MAT_SPD: 1468 case MAT_SYMMETRIC: 1469 case MAT_STRUCTURALLY_SYMMETRIC: 1470 case MAT_HERMITIAN: 1471 case MAT_SYMMETRY_ETERNAL: 1472 /* These options are handled directly by MatSetOption() */ 1473 break; 1474 default: 1475 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1482 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1483 { 1484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1487 MatScalar *aa,*aa_i; 1488 PetscScalar *v_i; 1489 1490 PetscFunctionBegin; 1491 bs = A->rmap->bs; 1492 ai = a->i; 1493 aj = a->j; 1494 aa = a->a; 1495 bs2 = a->bs2; 1496 1497 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1498 1499 bn = row/bs; /* Block number */ 1500 bp = row % bs; /* Block Position */ 1501 M = ai[bn+1] - ai[bn]; 1502 *nz = bs*M; 1503 1504 if (v) { 1505 *v = 0; 1506 if (*nz) { 1507 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1508 for (i=0; i<M; i++) { /* for each block in the block row */ 1509 v_i = *v + i*bs; 1510 aa_i = aa + bs2*(ai[bn] + i); 1511 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1512 } 1513 } 1514 } 1515 1516 if (idx) { 1517 *idx = 0; 1518 if (*nz) { 1519 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1520 for (i=0; i<M; i++) { /* for each block in the block row */ 1521 idx_i = *idx + i*bs; 1522 itmp = bs*aj[ai[bn] + i]; 1523 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1524 } 1525 } 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1532 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1533 { 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1538 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1539 PetscFunctionReturn(0); 1540 } 1541 1542 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1546 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1547 { 1548 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1549 Mat C; 1550 PetscErrorCode ierr; 1551 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1552 PetscInt *rows,*cols,bs2=a->bs2; 1553 MatScalar *array; 1554 1555 PetscFunctionBegin; 1556 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1557 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1558 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1559 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1560 1561 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1562 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1563 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1564 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1565 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1566 ierr = PetscFree(col);CHKERRQ(ierr); 1567 } else { 1568 C = *B; 1569 } 1570 1571 array = a->a; 1572 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1573 for (i=0; i<mbs; i++) { 1574 cols[0] = i*bs; 1575 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1576 len = ai[i+1] - ai[i]; 1577 for (j=0; j<len; j++) { 1578 rows[0] = (*aj++)*bs; 1579 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1580 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1581 array += bs2; 1582 } 1583 } 1584 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1585 1586 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1587 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1588 1589 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1590 *B = C; 1591 } else { 1592 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1593 } 1594 PetscFunctionReturn(0); 1595 } 1596 1597 EXTERN_C_BEGIN 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1600 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1601 { 1602 PetscErrorCode ierr; 1603 Mat Btrans; 1604 1605 PetscFunctionBegin; 1606 *f = PETSC_FALSE; 1607 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1608 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1609 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 EXTERN_C_END 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1616 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1617 { 1618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1619 PetscErrorCode ierr; 1620 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1621 int fd; 1622 PetscScalar *aa; 1623 FILE *file; 1624 1625 PetscFunctionBegin; 1626 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1627 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1628 col_lens[0] = MAT_FILE_CLASSID; 1629 1630 col_lens[1] = A->rmap->N; 1631 col_lens[2] = A->cmap->n; 1632 col_lens[3] = a->nz*bs2; 1633 1634 /* store lengths of each row and write (including header) to file */ 1635 count = 0; 1636 for (i=0; i<a->mbs; i++) { 1637 for (j=0; j<bs; j++) { 1638 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1639 } 1640 } 1641 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1642 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1643 1644 /* store column indices (zero start index) */ 1645 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1646 count = 0; 1647 for (i=0; i<a->mbs; i++) { 1648 for (j=0; j<bs; j++) { 1649 for (k=a->i[i]; k<a->i[i+1]; k++) { 1650 for (l=0; l<bs; l++) { 1651 jj[count++] = bs*a->j[k] + l; 1652 } 1653 } 1654 } 1655 } 1656 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1657 ierr = PetscFree(jj);CHKERRQ(ierr); 1658 1659 /* store nonzero values */ 1660 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1661 count = 0; 1662 for (i=0; i<a->mbs; i++) { 1663 for (j=0; j<bs; j++) { 1664 for (k=a->i[i]; k<a->i[i+1]; k++) { 1665 for (l=0; l<bs; l++) { 1666 aa[count++] = a->a[bs2*k + l*bs + j]; 1667 } 1668 } 1669 } 1670 } 1671 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1672 ierr = PetscFree(aa);CHKERRQ(ierr); 1673 1674 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1675 if (file) { 1676 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1683 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1684 { 1685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1686 PetscErrorCode ierr; 1687 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1688 PetscViewerFormat format; 1689 1690 PetscFunctionBegin; 1691 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1692 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1693 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1694 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1695 Mat aij; 1696 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1697 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1698 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1699 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1700 PetscFunctionReturn(0); 1701 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1702 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1703 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1704 for (i=0; i<a->mbs; i++) { 1705 for (j=0; j<bs; j++) { 1706 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1707 for (k=a->i[i]; k<a->i[i+1]; k++) { 1708 for (l=0; l<bs; l++) { 1709 #if defined(PETSC_USE_COMPLEX) 1710 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1711 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1712 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1713 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1714 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1715 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1716 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1717 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1718 } 1719 #else 1720 if (a->a[bs2*k + l*bs + j] != 0.0) { 1721 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1722 } 1723 #endif 1724 } 1725 } 1726 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1727 } 1728 } 1729 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1730 } else { 1731 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1732 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1733 for (i=0; i<a->mbs; i++) { 1734 for (j=0; j<bs; j++) { 1735 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1736 for (k=a->i[i]; k<a->i[i+1]; k++) { 1737 for (l=0; l<bs; l++) { 1738 #if defined(PETSC_USE_COMPLEX) 1739 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1740 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1741 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1742 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1743 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1744 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1745 } else { 1746 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1747 } 1748 #else 1749 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1750 #endif 1751 } 1752 } 1753 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1754 } 1755 } 1756 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1757 } 1758 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1764 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1765 { 1766 Mat A = (Mat) Aa; 1767 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1768 PetscErrorCode ierr; 1769 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1770 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1771 MatScalar *aa; 1772 PetscViewer viewer; 1773 PetscViewerFormat format; 1774 1775 PetscFunctionBegin; 1776 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1777 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1778 1779 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1780 1781 /* loop over matrix elements drawing boxes */ 1782 1783 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1784 color = PETSC_DRAW_BLUE; 1785 for (i=0,row=0; i<mbs; i++,row+=bs) { 1786 for (j=a->i[i]; j<a->i[i+1]; j++) { 1787 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1788 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1789 aa = a->a + j*bs2; 1790 for (k=0; k<bs; k++) { 1791 for (l=0; l<bs; l++) { 1792 if (PetscRealPart(*aa++) >= 0.) continue; 1793 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1794 } 1795 } 1796 } 1797 } 1798 color = PETSC_DRAW_CYAN; 1799 for (i=0,row=0; i<mbs; i++,row+=bs) { 1800 for (j=a->i[i]; j<a->i[i+1]; j++) { 1801 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1802 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1803 aa = a->a + j*bs2; 1804 for (k=0; k<bs; k++) { 1805 for (l=0; l<bs; l++) { 1806 if (PetscRealPart(*aa++) != 0.) continue; 1807 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1808 } 1809 } 1810 } 1811 } 1812 color = PETSC_DRAW_RED; 1813 for (i=0,row=0; i<mbs; i++,row+=bs) { 1814 for (j=a->i[i]; j<a->i[i+1]; j++) { 1815 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1816 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1817 aa = a->a + j*bs2; 1818 for (k=0; k<bs; k++) { 1819 for (l=0; l<bs; l++) { 1820 if (PetscRealPart(*aa++) <= 0.) continue; 1821 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1822 } 1823 } 1824 } 1825 } 1826 } else { 1827 /* use contour shading to indicate magnitude of values */ 1828 /* first determine max of all nonzero values */ 1829 PetscDraw popup; 1830 PetscReal scale,maxv = 0.0; 1831 1832 for (i=0; i<a->nz*a->bs2; i++) { 1833 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1834 } 1835 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1836 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1837 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1838 for (i=0,row=0; i<mbs; i++,row+=bs) { 1839 for (j=a->i[i]; j<a->i[i+1]; j++) { 1840 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1841 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1842 aa = a->a + j*bs2; 1843 for (k=0; k<bs; k++) { 1844 for (l=0; l<bs; l++) { 1845 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1846 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1847 } 1848 } 1849 } 1850 } 1851 } 1852 PetscFunctionReturn(0); 1853 } 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1857 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1858 { 1859 PetscErrorCode ierr; 1860 PetscReal xl,yl,xr,yr,w,h; 1861 PetscDraw draw; 1862 PetscBool isnull; 1863 1864 PetscFunctionBegin; 1865 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1866 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1867 1868 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1869 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1870 xr += w; yr += h; xl = -w; yl = -h; 1871 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1872 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1873 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatView_SeqBAIJ" 1879 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1880 { 1881 PetscErrorCode ierr; 1882 PetscBool iascii,isbinary,isdraw; 1883 1884 PetscFunctionBegin; 1885 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1886 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1887 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1888 if (iascii) { 1889 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1890 } else if (isbinary) { 1891 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1892 } else if (isdraw) { 1893 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1894 } else { 1895 Mat B; 1896 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1897 ierr = MatView(B,viewer);CHKERRQ(ierr); 1898 ierr = MatDestroy(&B);CHKERRQ(ierr); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 1904 #undef __FUNCT__ 1905 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1906 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1907 { 1908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1909 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1910 PetscInt *ai = a->i,*ailen = a->ilen; 1911 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1912 MatScalar *ap,*aa = a->a; 1913 1914 PetscFunctionBegin; 1915 for (k=0; k<m; k++) { /* loop over rows */ 1916 row = im[k]; brow = row/bs; 1917 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1918 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1919 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1920 nrow = ailen[brow]; 1921 for (l=0; l<n; l++) { /* loop over columns */ 1922 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1923 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1924 col = in[l] ; 1925 bcol = col/bs; 1926 cidx = col%bs; 1927 ridx = row%bs; 1928 high = nrow; 1929 low = 0; /* assume unsorted */ 1930 while (high-low > 5) { 1931 t = (low+high)/2; 1932 if (rp[t] > bcol) high = t; 1933 else low = t; 1934 } 1935 for (i=low; i<high; i++) { 1936 if (rp[i] > bcol) break; 1937 if (rp[i] == bcol) { 1938 *v++ = ap[bs2*i+bs*cidx+ridx]; 1939 goto finished; 1940 } 1941 } 1942 *v++ = 0.0; 1943 finished:; 1944 } 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1951 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1952 { 1953 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1954 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1955 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1956 PetscErrorCode ierr; 1957 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1958 PetscBool roworiented=a->roworiented; 1959 const PetscScalar *value = v; 1960 MatScalar *ap,*aa = a->a,*bap; 1961 1962 PetscFunctionBegin; 1963 if (roworiented) { 1964 stepval = (n-1)*bs; 1965 } else { 1966 stepval = (m-1)*bs; 1967 } 1968 for (k=0; k<m; k++) { /* loop over added rows */ 1969 row = im[k]; 1970 if (row < 0) continue; 1971 #if defined(PETSC_USE_DEBUG) 1972 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1973 #endif 1974 rp = aj + ai[row]; 1975 ap = aa + bs2*ai[row]; 1976 rmax = imax[row]; 1977 nrow = ailen[row]; 1978 low = 0; 1979 high = nrow; 1980 for (l=0; l<n; l++) { /* loop over added columns */ 1981 if (in[l] < 0) continue; 1982 #if defined(PETSC_USE_DEBUG) 1983 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1984 #endif 1985 col = in[l]; 1986 if (roworiented) { 1987 value = v + (k*(stepval+bs) + l)*bs; 1988 } else { 1989 value = v + (l*(stepval+bs) + k)*bs; 1990 } 1991 if (col <= lastcol) low = 0; else high = nrow; 1992 lastcol = col; 1993 while (high-low > 7) { 1994 t = (low+high)/2; 1995 if (rp[t] > col) high = t; 1996 else low = t; 1997 } 1998 for (i=low; i<high; i++) { 1999 if (rp[i] > col) break; 2000 if (rp[i] == col) { 2001 bap = ap + bs2*i; 2002 if (roworiented) { 2003 if (is == ADD_VALUES) { 2004 for (ii=0; ii<bs; ii++,value+=stepval) { 2005 for (jj=ii; jj<bs2; jj+=bs) { 2006 bap[jj] += *value++; 2007 } 2008 } 2009 } else { 2010 for (ii=0; ii<bs; ii++,value+=stepval) { 2011 for (jj=ii; jj<bs2; jj+=bs) { 2012 bap[jj] = *value++; 2013 } 2014 } 2015 } 2016 } else { 2017 if (is == ADD_VALUES) { 2018 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2019 for (jj=0; jj<bs; jj++) { 2020 bap[jj] += value[jj]; 2021 } 2022 bap += bs; 2023 } 2024 } else { 2025 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2026 for (jj=0; jj<bs; jj++) { 2027 bap[jj] = value[jj]; 2028 } 2029 bap += bs; 2030 } 2031 } 2032 } 2033 goto noinsert2; 2034 } 2035 } 2036 if (nonew == 1) goto noinsert2; 2037 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2038 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2039 N = nrow++ - 1; high++; 2040 /* shift up all the later entries in this row */ 2041 for (ii=N; ii>=i; ii--) { 2042 rp[ii+1] = rp[ii]; 2043 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2044 } 2045 if (N >= i) { 2046 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2047 } 2048 rp[i] = col; 2049 bap = ap + bs2*i; 2050 if (roworiented) { 2051 for (ii=0; ii<bs; ii++,value+=stepval) { 2052 for (jj=ii; jj<bs2; jj+=bs) { 2053 bap[jj] = *value++; 2054 } 2055 } 2056 } else { 2057 for (ii=0; ii<bs; ii++,value+=stepval) { 2058 for (jj=0; jj<bs; jj++) { 2059 *bap++ = *value++; 2060 } 2061 } 2062 } 2063 noinsert2:; 2064 low = i; 2065 } 2066 ailen[row] = nrow; 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2073 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2074 { 2075 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2076 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2077 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2078 PetscErrorCode ierr; 2079 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2080 MatScalar *aa = a->a,*ap; 2081 PetscReal ratio=0.6; 2082 2083 PetscFunctionBegin; 2084 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2085 2086 if (m) rmax = ailen[0]; 2087 for (i=1; i<mbs; i++) { 2088 /* move each row back by the amount of empty slots (fshift) before it*/ 2089 fshift += imax[i-1] - ailen[i-1]; 2090 rmax = PetscMax(rmax,ailen[i]); 2091 if (fshift) { 2092 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2093 N = ailen[i]; 2094 for (j=0; j<N; j++) { 2095 ip[j-fshift] = ip[j]; 2096 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2097 } 2098 } 2099 ai[i] = ai[i-1] + ailen[i-1]; 2100 } 2101 if (mbs) { 2102 fshift += imax[mbs-1] - ailen[mbs-1]; 2103 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2104 } 2105 /* reset ilen and imax for each row */ 2106 for (i=0; i<mbs; i++) { 2107 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2108 } 2109 a->nz = ai[mbs]; 2110 2111 /* diagonals may have moved, so kill the diagonal pointers */ 2112 a->idiagvalid = PETSC_FALSE; 2113 if (fshift && a->diag) { 2114 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2115 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2116 a->diag = 0; 2117 } 2118 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2119 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2120 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2121 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2122 A->info.mallocs += a->reallocs; 2123 a->reallocs = 0; 2124 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2125 2126 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2127 A->same_nonzero = PETSC_TRUE; 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /* 2132 This function returns an array of flags which indicate the locations of contiguous 2133 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2134 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2135 Assume: sizes should be long enough to hold all the values. 2136 */ 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2139 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2140 { 2141 PetscInt i,j,k,row; 2142 PetscBool flg; 2143 2144 PetscFunctionBegin; 2145 for (i=0,j=0; i<n; j++) { 2146 row = idx[i]; 2147 if (row%bs!=0) { /* Not the begining of a block */ 2148 sizes[j] = 1; 2149 i++; 2150 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2151 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2152 i++; 2153 } else { /* Begining of the block, so check if the complete block exists */ 2154 flg = PETSC_TRUE; 2155 for (k=1; k<bs; k++) { 2156 if (row+k != idx[i+k]) { /* break in the block */ 2157 flg = PETSC_FALSE; 2158 break; 2159 } 2160 } 2161 if (flg) { /* No break in the bs */ 2162 sizes[j] = bs; 2163 i+= bs; 2164 } else { 2165 sizes[j] = 1; 2166 i++; 2167 } 2168 } 2169 } 2170 *bs_max = j; 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2176 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2177 { 2178 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2179 PetscErrorCode ierr; 2180 PetscInt i,j,k,count,*rows; 2181 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2182 PetscScalar zero = 0.0; 2183 MatScalar *aa; 2184 const PetscScalar *xx; 2185 PetscScalar *bb; 2186 2187 PetscFunctionBegin; 2188 /* fix right hand side if needed */ 2189 if (x && b) { 2190 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2191 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2192 for (i=0; i<is_n; i++) { 2193 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2194 } 2195 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2196 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2197 } 2198 2199 /* Make a copy of the IS and sort it */ 2200 /* allocate memory for rows,sizes */ 2201 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2202 2203 /* copy IS values to rows, and sort them */ 2204 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2205 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2206 2207 if (baij->keepnonzeropattern) { 2208 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2209 bs_max = is_n; 2210 A->same_nonzero = PETSC_TRUE; 2211 } else { 2212 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2213 A->same_nonzero = PETSC_FALSE; 2214 } 2215 2216 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2217 row = rows[j]; 2218 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2219 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2220 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2221 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2222 if (diag != (PetscScalar)0.0) { 2223 if (baij->ilen[row/bs] > 0) { 2224 baij->ilen[row/bs] = 1; 2225 baij->j[baij->i[row/bs]] = row/bs; 2226 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2227 } 2228 /* Now insert all the diagonal values for this bs */ 2229 for (k=0; k<bs; k++) { 2230 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2231 } 2232 } else { /* (diag == 0.0) */ 2233 baij->ilen[row/bs] = 0; 2234 } /* end (diag == 0.0) */ 2235 } else { /* (sizes[i] != bs) */ 2236 #if defined (PETSC_USE_DEBUG) 2237 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2238 #endif 2239 for (k=0; k<count; k++) { 2240 aa[0] = zero; 2241 aa += bs; 2242 } 2243 if (diag != (PetscScalar)0.0) { 2244 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2245 } 2246 } 2247 } 2248 2249 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2250 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2256 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2257 { 2258 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2259 PetscErrorCode ierr; 2260 PetscInt i,j,k,count; 2261 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2262 PetscScalar zero = 0.0; 2263 MatScalar *aa; 2264 const PetscScalar *xx; 2265 PetscScalar *bb; 2266 PetscBool *zeroed,vecs = PETSC_FALSE; 2267 2268 PetscFunctionBegin; 2269 /* fix right hand side if needed */ 2270 if (x && b) { 2271 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2272 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2273 vecs = PETSC_TRUE; 2274 } 2275 A->same_nonzero = PETSC_TRUE; 2276 2277 /* zero the columns */ 2278 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2279 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2280 for (i=0; i<is_n; i++) { 2281 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2282 zeroed[is_idx[i]] = PETSC_TRUE; 2283 } 2284 for (i=0; i<A->rmap->N; i++) { 2285 if (!zeroed[i]) { 2286 row = i/bs; 2287 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2288 for (k=0; k<bs; k++) { 2289 col = bs*baij->j[j] + k; 2290 if (zeroed[col]) { 2291 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2292 if (vecs) bb[i] -= aa[0]*xx[col]; 2293 aa[0] = 0.0; 2294 } 2295 } 2296 } 2297 } else if (vecs) bb[i] = diag*xx[i]; 2298 } 2299 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2300 if (vecs) { 2301 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2302 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2303 } 2304 2305 /* zero the rows */ 2306 for (i=0; i<is_n; i++) { 2307 row = is_idx[i]; 2308 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2309 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2310 for (k=0; k<count; k++) { 2311 aa[0] = zero; 2312 aa += bs; 2313 } 2314 if (diag != (PetscScalar)0.0) { 2315 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2316 } 2317 } 2318 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2324 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2325 { 2326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2327 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2328 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2329 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2330 PetscErrorCode ierr; 2331 PetscInt ridx,cidx,bs2=a->bs2; 2332 PetscBool roworiented=a->roworiented; 2333 MatScalar *ap,value,*aa=a->a,*bap; 2334 2335 PetscFunctionBegin; 2336 if (v) PetscValidScalarPointer(v,6); 2337 for (k=0; k<m; k++) { /* loop over added rows */ 2338 row = im[k]; 2339 brow = row/bs; 2340 if (row < 0) continue; 2341 #if defined(PETSC_USE_DEBUG) 2342 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2343 #endif 2344 rp = aj + ai[brow]; 2345 ap = aa + bs2*ai[brow]; 2346 rmax = imax[brow]; 2347 nrow = ailen[brow]; 2348 low = 0; 2349 high = nrow; 2350 for (l=0; l<n; l++) { /* loop over added columns */ 2351 if (in[l] < 0) continue; 2352 #if defined(PETSC_USE_DEBUG) 2353 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2354 #endif 2355 col = in[l]; bcol = col/bs; 2356 ridx = row % bs; cidx = col % bs; 2357 if (roworiented) { 2358 value = v[l + k*n]; 2359 } else { 2360 value = v[k + l*m]; 2361 } 2362 if (col <= lastcol) low = 0; else high = nrow; 2363 lastcol = col; 2364 while (high-low > 7) { 2365 t = (low+high)/2; 2366 if (rp[t] > bcol) high = t; 2367 else low = t; 2368 } 2369 for (i=low; i<high; i++) { 2370 if (rp[i] > bcol) break; 2371 if (rp[i] == bcol) { 2372 bap = ap + bs2*i + bs*cidx + ridx; 2373 if (is == ADD_VALUES) *bap += value; 2374 else *bap = value; 2375 goto noinsert1; 2376 } 2377 } 2378 if (nonew == 1) goto noinsert1; 2379 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2380 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2381 N = nrow++ - 1; high++; 2382 /* shift up all the later entries in this row */ 2383 for (ii=N; ii>=i; ii--) { 2384 rp[ii+1] = rp[ii]; 2385 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2386 } 2387 if (N>=i) { 2388 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2389 } 2390 rp[i] = bcol; 2391 ap[bs2*i + bs*cidx + ridx] = value; 2392 a->nz++; 2393 noinsert1:; 2394 low = i; 2395 } 2396 ailen[brow] = nrow; 2397 } 2398 A->same_nonzero = PETSC_FALSE; 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2404 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2405 { 2406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2407 Mat outA; 2408 PetscErrorCode ierr; 2409 PetscBool row_identity,col_identity; 2410 2411 PetscFunctionBegin; 2412 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2413 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2414 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2415 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2416 2417 outA = inA; 2418 inA->factortype = MAT_FACTOR_LU; 2419 2420 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2421 2422 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2423 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2424 a->row = row; 2425 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2426 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2427 a->col = col; 2428 2429 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2430 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2431 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2432 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2433 2434 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2435 if (!a->solve_work) { 2436 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2437 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2438 } 2439 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 EXTERN_C_BEGIN 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2446 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2447 { 2448 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2449 PetscInt i,nz,mbs; 2450 2451 PetscFunctionBegin; 2452 nz = baij->maxnz; 2453 mbs = baij->mbs; 2454 for (i=0; i<nz; i++) { 2455 baij->j[i] = indices[i]; 2456 } 2457 baij->nz = nz; 2458 for (i=0; i<mbs; i++) { 2459 baij->ilen[i] = baij->imax[i]; 2460 } 2461 PetscFunctionReturn(0); 2462 } 2463 EXTERN_C_END 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2467 /*@ 2468 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2469 in the matrix. 2470 2471 Input Parameters: 2472 + mat - the SeqBAIJ matrix 2473 - indices - the column indices 2474 2475 Level: advanced 2476 2477 Notes: 2478 This can be called if you have precomputed the nonzero structure of the 2479 matrix and want to provide it to the matrix object to improve the performance 2480 of the MatSetValues() operation. 2481 2482 You MUST have set the correct numbers of nonzeros per row in the call to 2483 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2484 2485 MUST be called before any calls to MatSetValues(); 2486 2487 @*/ 2488 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2489 { 2490 PetscErrorCode ierr; 2491 2492 PetscFunctionBegin; 2493 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2494 PetscValidPointer(indices,2); 2495 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2501 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2502 { 2503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2504 PetscErrorCode ierr; 2505 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2506 PetscReal atmp; 2507 PetscScalar *x,zero = 0.0; 2508 MatScalar *aa; 2509 PetscInt ncols,brow,krow,kcol; 2510 2511 PetscFunctionBegin; 2512 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2513 bs = A->rmap->bs; 2514 aa = a->a; 2515 ai = a->i; 2516 aj = a->j; 2517 mbs = a->mbs; 2518 2519 ierr = VecSet(v,zero);CHKERRQ(ierr); 2520 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2521 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2522 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2523 for (i=0; i<mbs; i++) { 2524 ncols = ai[1] - ai[0]; ai++; 2525 brow = bs*i; 2526 for (j=0; j<ncols; j++) { 2527 for (kcol=0; kcol<bs; kcol++) { 2528 for (krow=0; krow<bs; krow++) { 2529 atmp = PetscAbsScalar(*aa);aa++; 2530 row = brow + krow; /* row index */ 2531 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2532 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2533 } 2534 } 2535 aj++; 2536 } 2537 } 2538 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 #undef __FUNCT__ 2543 #define __FUNCT__ "MatCopy_SeqBAIJ" 2544 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2545 { 2546 PetscErrorCode ierr; 2547 2548 PetscFunctionBegin; 2549 /* If the two matrices have the same copy implementation, use fast copy. */ 2550 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2552 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2553 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2554 2555 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2556 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2557 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2558 } else { 2559 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2560 } 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2566 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2567 { 2568 PetscErrorCode ierr; 2569 2570 PetscFunctionBegin; 2571 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 #undef __FUNCT__ 2576 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2577 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2578 { 2579 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2580 2581 PetscFunctionBegin; 2582 *array = a->a; 2583 PetscFunctionReturn(0); 2584 } 2585 2586 #undef __FUNCT__ 2587 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2588 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2589 { 2590 PetscFunctionBegin; 2591 PetscFunctionReturn(0); 2592 } 2593 2594 #undef __FUNCT__ 2595 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2596 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2597 { 2598 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2599 PetscErrorCode ierr; 2600 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2601 PetscBLASInt one=1; 2602 2603 PetscFunctionBegin; 2604 if (str == SAME_NONZERO_PATTERN) { 2605 PetscScalar alpha = a; 2606 PetscBLASInt bnz; 2607 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2608 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2609 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2610 if (y->xtoy && y->XtoY != X) { 2611 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2612 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2613 } 2614 if (!y->xtoy) { /* get xtoy */ 2615 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2616 y->XtoY = X; 2617 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2618 } 2619 for (i=0; i<x->nz; i++) { 2620 j = 0; 2621 while (j < bs2) { 2622 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2623 j++; 2624 } 2625 } 2626 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2627 } else { 2628 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2629 } 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2635 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2636 { 2637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2638 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2639 MatScalar *aa = a->a; 2640 2641 PetscFunctionBegin; 2642 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 #undef __FUNCT__ 2647 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2648 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2649 { 2650 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2651 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2652 MatScalar *aa = a->a; 2653 2654 PetscFunctionBegin; 2655 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2656 PetscFunctionReturn(0); 2657 } 2658 2659 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2663 /* 2664 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2665 */ 2666 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2667 { 2668 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2669 PetscErrorCode ierr; 2670 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2671 PetscInt nz = a->i[m],row,*jj,mr,col; 2672 2673 PetscFunctionBegin; 2674 *nn = n; 2675 if (!ia) PetscFunctionReturn(0); 2676 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2677 else { 2678 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2679 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2680 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2681 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2682 jj = a->j; 2683 for (i=0; i<nz; i++) { 2684 collengths[jj[i]]++; 2685 } 2686 cia[0] = oshift; 2687 for (i=0; i<n; i++) { 2688 cia[i+1] = cia[i] + collengths[i]; 2689 } 2690 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2691 jj = a->j; 2692 for (row=0; row<m; row++) { 2693 mr = a->i[row+1] - a->i[row]; 2694 for (i=0; i<mr; i++) { 2695 col = *jj++; 2696 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2697 } 2698 } 2699 ierr = PetscFree(collengths);CHKERRQ(ierr); 2700 *ia = cia; *ja = cja; 2701 } 2702 PetscFunctionReturn(0); 2703 } 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2707 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2708 { 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 if (!ia) PetscFunctionReturn(0); 2713 ierr = PetscFree(*ia);CHKERRQ(ierr); 2714 ierr = PetscFree(*ja);CHKERRQ(ierr); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2720 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2721 { 2722 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2723 PetscErrorCode ierr; 2724 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2725 PetscScalar dx,*y,*xx,*w3_array; 2726 PetscScalar *vscale_array; 2727 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2728 Vec w1=coloring->w1,w2=coloring->w2,w3; 2729 void *fctx = coloring->fctx; 2730 PetscBool flg = PETSC_FALSE; 2731 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2732 Vec x1_tmp; 2733 2734 PetscFunctionBegin; 2735 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2736 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2737 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2738 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2739 2740 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2741 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2742 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2743 if (flg) { 2744 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2745 } else { 2746 PetscBool assembled; 2747 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2748 if (assembled) { 2749 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2750 } 2751 } 2752 2753 x1_tmp = x1; 2754 if (!coloring->vscale) { 2755 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2756 } 2757 2758 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2759 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2760 } 2761 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2762 2763 /* Set w1 = F(x1) */ 2764 if (!coloring->fset) { 2765 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2766 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2767 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2768 } else { 2769 coloring->fset = PETSC_FALSE; 2770 } 2771 2772 if (!coloring->w3) { 2773 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2774 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2775 } 2776 w3 = coloring->w3; 2777 2778 CHKMEMQ; 2779 /* Compute all the local scale factors, including ghost points */ 2780 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2781 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2782 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2783 if (ctype == IS_COLORING_GHOSTED) { 2784 col_start = 0; col_end = N; 2785 } else if (ctype == IS_COLORING_GLOBAL) { 2786 xx = xx - start; 2787 vscale_array = vscale_array - start; 2788 col_start = start; col_end = N + start; 2789 } CHKMEMQ; 2790 for (col=col_start; col<col_end; col++) { 2791 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2792 if (coloring->htype[0] == 'w') { 2793 dx = 1.0 + unorm; 2794 } else { 2795 dx = xx[col]; 2796 } 2797 if (dx == (PetscScalar)0.0) dx = 1.0; 2798 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2799 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2800 dx *= epsilon; 2801 vscale_array[col] = (PetscScalar)1.0/dx; 2802 } CHKMEMQ; 2803 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2804 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2805 if (ctype == IS_COLORING_GLOBAL) { 2806 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2807 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2808 } 2809 CHKMEMQ; 2810 if (coloring->vscaleforrow) { 2811 vscaleforrow = coloring->vscaleforrow; 2812 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2813 2814 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2815 /* 2816 Loop over each color 2817 */ 2818 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2819 for (k=0; k<coloring->ncolors; k++) { 2820 coloring->currentcolor = k; 2821 for (i=0; i<bs; i++) { 2822 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2823 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2824 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2825 /* 2826 Loop over each column associated with color 2827 adding the perturbation to the vector w3. 2828 */ 2829 for (l=0; l<coloring->ncolumns[k]; l++) { 2830 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2831 if (coloring->htype[0] == 'w') { 2832 dx = 1.0 + unorm; 2833 } else { 2834 dx = xx[col]; 2835 } 2836 if (dx == (PetscScalar)0.0) dx = 1.0; 2837 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2838 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2839 dx *= epsilon; 2840 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2841 w3_array[col] += dx; 2842 } 2843 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2844 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2845 2846 /* 2847 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2848 w2 = F(x1 + dx) - F(x1) 2849 */ 2850 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2851 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2852 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2853 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2854 2855 /* 2856 Loop over rows of vector, putting results into Jacobian matrix 2857 */ 2858 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2859 for (l=0; l<coloring->nrows[k]; l++) { 2860 row = bs*coloring->rows[k][l]; /* local row index */ 2861 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2862 for (j=0; j<bs; j++) { 2863 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2864 srows[j] = row + start + j; 2865 } 2866 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2867 } 2868 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2869 } 2870 } /* endof for each color */ 2871 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2872 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2873 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2874 ierr = PetscFree(srows);CHKERRQ(ierr); 2875 2876 coloring->currentcolor = -1; 2877 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2878 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2880 PetscFunctionReturn(0); 2881 } 2882 2883 /* -------------------------------------------------------------------*/ 2884 static struct _MatOps MatOps_Values = { 2885 MatSetValues_SeqBAIJ, 2886 MatGetRow_SeqBAIJ, 2887 MatRestoreRow_SeqBAIJ, 2888 MatMult_SeqBAIJ_N, 2889 /* 4*/ MatMultAdd_SeqBAIJ_N, 2890 MatMultTranspose_SeqBAIJ, 2891 MatMultTransposeAdd_SeqBAIJ, 2892 0, 2893 0, 2894 0, 2895 /* 10*/ 0, 2896 MatLUFactor_SeqBAIJ, 2897 0, 2898 0, 2899 MatTranspose_SeqBAIJ, 2900 /* 15*/ MatGetInfo_SeqBAIJ, 2901 MatEqual_SeqBAIJ, 2902 MatGetDiagonal_SeqBAIJ, 2903 MatDiagonalScale_SeqBAIJ, 2904 MatNorm_SeqBAIJ, 2905 /* 20*/ 0, 2906 MatAssemblyEnd_SeqBAIJ, 2907 MatSetOption_SeqBAIJ, 2908 MatZeroEntries_SeqBAIJ, 2909 /* 24*/ MatZeroRows_SeqBAIJ, 2910 0, 2911 0, 2912 0, 2913 0, 2914 /* 29*/ MatSetUp_SeqBAIJ, 2915 0, 2916 0, 2917 0, 2918 0, 2919 /* 34*/ MatDuplicate_SeqBAIJ, 2920 0, 2921 0, 2922 MatILUFactor_SeqBAIJ, 2923 0, 2924 /* 39*/ MatAXPY_SeqBAIJ, 2925 MatGetSubMatrices_SeqBAIJ, 2926 MatIncreaseOverlap_SeqBAIJ, 2927 MatGetValues_SeqBAIJ, 2928 MatCopy_SeqBAIJ, 2929 /* 44*/ 0, 2930 MatScale_SeqBAIJ, 2931 0, 2932 0, 2933 MatZeroRowsColumns_SeqBAIJ, 2934 /* 49*/ 0, 2935 MatGetRowIJ_SeqBAIJ, 2936 MatRestoreRowIJ_SeqBAIJ, 2937 MatGetColumnIJ_SeqBAIJ, 2938 MatRestoreColumnIJ_SeqBAIJ, 2939 /* 54*/ MatFDColoringCreate_SeqAIJ, 2940 0, 2941 0, 2942 0, 2943 MatSetValuesBlocked_SeqBAIJ, 2944 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2945 MatDestroy_SeqBAIJ, 2946 MatView_SeqBAIJ, 2947 0, 2948 0, 2949 /* 64*/ 0, 2950 0, 2951 0, 2952 0, 2953 0, 2954 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2955 0, 2956 MatConvert_Basic, 2957 0, 2958 0, 2959 /* 74*/ 0, 2960 MatFDColoringApply_BAIJ, 2961 0, 2962 0, 2963 0, 2964 /* 79*/ 0, 2965 0, 2966 0, 2967 0, 2968 MatLoad_SeqBAIJ, 2969 /* 84*/ 0, 2970 0, 2971 0, 2972 0, 2973 0, 2974 /* 89*/ 0, 2975 0, 2976 0, 2977 0, 2978 0, 2979 /* 94*/ 0, 2980 0, 2981 0, 2982 0, 2983 0, 2984 /* 99*/ 0, 2985 0, 2986 0, 2987 0, 2988 0, 2989 /*104*/ 0, 2990 MatRealPart_SeqBAIJ, 2991 MatImaginaryPart_SeqBAIJ, 2992 0, 2993 0, 2994 /*109*/ 0, 2995 0, 2996 0, 2997 0, 2998 MatMissingDiagonal_SeqBAIJ, 2999 /*114*/ 0, 3000 0, 3001 0, 3002 0, 3003 0, 3004 /*119*/ 0, 3005 0, 3006 MatMultHermitianTranspose_SeqBAIJ, 3007 MatMultHermitianTransposeAdd_SeqBAIJ, 3008 0, 3009 /*124*/ 0, 3010 0, 3011 MatInvertBlockDiagonal_SeqBAIJ 3012 }; 3013 3014 EXTERN_C_BEGIN 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3017 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3018 { 3019 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3020 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3025 3026 /* allocate space for values if not already there */ 3027 if (!aij->saved_values) { 3028 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3029 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3030 } 3031 3032 /* copy values over */ 3033 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3034 PetscFunctionReturn(0); 3035 } 3036 EXTERN_C_END 3037 3038 EXTERN_C_BEGIN 3039 #undef __FUNCT__ 3040 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3041 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3042 { 3043 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3044 PetscErrorCode ierr; 3045 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3046 3047 PetscFunctionBegin; 3048 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3049 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3050 3051 /* copy values over */ 3052 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3053 PetscFunctionReturn(0); 3054 } 3055 EXTERN_C_END 3056 3057 EXTERN_C_BEGIN 3058 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3059 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3060 EXTERN_C_END 3061 3062 EXTERN_C_BEGIN 3063 #undef __FUNCT__ 3064 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3065 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3066 { 3067 Mat_SeqBAIJ *b; 3068 PetscErrorCode ierr; 3069 PetscInt i,mbs,nbs,bs2; 3070 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3071 3072 PetscFunctionBegin; 3073 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3074 if (nz == MAT_SKIP_ALLOCATION) { 3075 skipallocation = PETSC_TRUE; 3076 nz = 0; 3077 } 3078 3079 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3080 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3081 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3082 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3083 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3084 3085 B->preallocated = PETSC_TRUE; 3086 3087 mbs = B->rmap->n/bs; 3088 nbs = B->cmap->n/bs; 3089 bs2 = bs*bs; 3090 3091 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3092 3093 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3094 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3095 if (nnz) { 3096 for (i=0; i<mbs; i++) { 3097 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3098 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3099 } 3100 } 3101 3102 b = (Mat_SeqBAIJ*)B->data; 3103 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3104 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3105 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3106 3107 if (!flg) { 3108 switch (bs) { 3109 case 1: 3110 B->ops->mult = MatMult_SeqBAIJ_1; 3111 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3112 B->ops->sor = MatSOR_SeqBAIJ_1; 3113 break; 3114 case 2: 3115 B->ops->mult = MatMult_SeqBAIJ_2; 3116 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3117 B->ops->sor = MatSOR_SeqBAIJ_2; 3118 break; 3119 case 3: 3120 B->ops->mult = MatMult_SeqBAIJ_3; 3121 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3122 B->ops->sor = MatSOR_SeqBAIJ_3; 3123 break; 3124 case 4: 3125 B->ops->mult = MatMult_SeqBAIJ_4; 3126 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3127 B->ops->sor = MatSOR_SeqBAIJ_4; 3128 break; 3129 case 5: 3130 B->ops->mult = MatMult_SeqBAIJ_5; 3131 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3132 B->ops->sor = MatSOR_SeqBAIJ_5; 3133 break; 3134 case 6: 3135 B->ops->mult = MatMult_SeqBAIJ_6; 3136 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3137 B->ops->sor = MatSOR_SeqBAIJ_6; 3138 break; 3139 case 7: 3140 B->ops->mult = MatMult_SeqBAIJ_7; 3141 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3142 B->ops->sor = MatSOR_SeqBAIJ_7; 3143 break; 3144 case 15: 3145 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3146 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3147 B->ops->sor = MatSOR_SeqBAIJ_N; 3148 break; 3149 default: 3150 B->ops->mult = MatMult_SeqBAIJ_N; 3151 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3152 B->ops->sor = MatSOR_SeqBAIJ_N; 3153 break; 3154 } 3155 } 3156 b->mbs = mbs; 3157 b->nbs = nbs; 3158 if (!skipallocation) { 3159 if (!b->imax) { 3160 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3161 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3162 b->free_imax_ilen = PETSC_TRUE; 3163 } 3164 /* b->ilen will count nonzeros in each block row so far. */ 3165 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3166 if (!nnz) { 3167 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3168 else if (nz < 0) nz = 1; 3169 for (i=0; i<mbs; i++) b->imax[i] = nz; 3170 nz = nz*mbs; 3171 } else { 3172 nz = 0; 3173 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3174 } 3175 3176 /* allocate the matrix space */ 3177 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3178 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3179 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3180 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3181 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3182 b->singlemalloc = PETSC_TRUE; 3183 b->i[0] = 0; 3184 for (i=1; i<mbs+1; i++) { 3185 b->i[i] = b->i[i-1] + b->imax[i-1]; 3186 } 3187 b->free_a = PETSC_TRUE; 3188 b->free_ij = PETSC_TRUE; 3189 } else { 3190 b->free_a = PETSC_FALSE; 3191 b->free_ij = PETSC_FALSE; 3192 } 3193 3194 b->bs2 = bs2; 3195 b->mbs = mbs; 3196 b->nz = 0; 3197 b->maxnz = nz; 3198 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3199 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3200 PetscFunctionReturn(0); 3201 } 3202 EXTERN_C_END 3203 3204 EXTERN_C_BEGIN 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3207 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3208 { 3209 PetscInt i,m,nz,nz_max=0,*nnz; 3210 PetscScalar *values=0; 3211 PetscErrorCode ierr; 3212 3213 PetscFunctionBegin; 3214 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3215 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3216 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3217 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3218 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3219 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3220 m = B->rmap->n/bs; 3221 3222 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3223 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3224 for (i=0; i<m; i++) { 3225 nz = ii[i+1]- ii[i]; 3226 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3227 nz_max = PetscMax(nz_max, nz); 3228 nnz[i] = nz; 3229 } 3230 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3231 ierr = PetscFree(nnz);CHKERRQ(ierr); 3232 3233 values = (PetscScalar*)V; 3234 if (!values) { 3235 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3236 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3237 } 3238 for (i=0; i<m; i++) { 3239 PetscInt ncols = ii[i+1] - ii[i]; 3240 const PetscInt *icols = jj + ii[i]; 3241 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3242 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3243 } 3244 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3245 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3246 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3247 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3248 PetscFunctionReturn(0); 3249 } 3250 EXTERN_C_END 3251 3252 3253 EXTERN_C_BEGIN 3254 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3255 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3256 #if defined(PETSC_HAVE_MUMPS) 3257 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3258 #endif 3259 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3260 EXTERN_C_END 3261 3262 /*MC 3263 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3264 block sparse compressed row format. 3265 3266 Options Database Keys: 3267 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3268 3269 Level: beginner 3270 3271 .seealso: MatCreateSeqBAIJ() 3272 M*/ 3273 3274 EXTERN_C_BEGIN 3275 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3276 EXTERN_C_END 3277 3278 EXTERN_C_BEGIN 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatCreate_SeqBAIJ" 3281 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3282 { 3283 PetscErrorCode ierr; 3284 PetscMPIInt size; 3285 Mat_SeqBAIJ *b; 3286 3287 PetscFunctionBegin; 3288 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3289 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3290 3291 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3292 B->data = (void*)b; 3293 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3294 b->row = 0; 3295 b->col = 0; 3296 b->icol = 0; 3297 b->reallocs = 0; 3298 b->saved_values = 0; 3299 3300 b->roworiented = PETSC_TRUE; 3301 b->nonew = 0; 3302 b->diag = 0; 3303 b->solve_work = 0; 3304 b->mult_work = 0; 3305 B->spptr = 0; 3306 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3307 b->keepnonzeropattern = PETSC_FALSE; 3308 b->xtoy = 0; 3309 b->XtoY = 0; 3310 B->same_nonzero = PETSC_FALSE; 3311 3312 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3313 "MatGetFactorAvailable_seqbaij_petsc", 3314 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3315 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3316 "MatGetFactor_seqbaij_petsc", 3317 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3318 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3319 "MatGetFactor_seqbaij_bstrm", 3320 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3321 #if defined(PETSC_HAVE_MUMPS) 3322 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3323 #endif 3324 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3325 "MatInvertBlockDiagonal_SeqBAIJ", 3326 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3327 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3328 "MatStoreValues_SeqBAIJ", 3329 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3330 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3331 "MatRetrieveValues_SeqBAIJ", 3332 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3333 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3334 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3335 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3336 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3337 "MatConvert_SeqBAIJ_SeqAIJ", 3338 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3340 "MatConvert_SeqBAIJ_SeqSBAIJ", 3341 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3342 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3343 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3344 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3345 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3346 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3347 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3348 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3349 "MatConvert_SeqBAIJ_SeqBSTRM", 3350 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3352 "MatIsTranspose_SeqBAIJ", 3353 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3354 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3355 PetscFunctionReturn(0); 3356 } 3357 EXTERN_C_END 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3361 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3362 { 3363 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3364 PetscErrorCode ierr; 3365 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3366 3367 PetscFunctionBegin; 3368 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3369 3370 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3371 c->imax = a->imax; 3372 c->ilen = a->ilen; 3373 c->free_imax_ilen = PETSC_FALSE; 3374 } else { 3375 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3376 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3377 for (i=0; i<mbs; i++) { 3378 c->imax[i] = a->imax[i]; 3379 c->ilen[i] = a->ilen[i]; 3380 } 3381 c->free_imax_ilen = PETSC_TRUE; 3382 } 3383 3384 /* allocate the matrix space */ 3385 if (mallocmatspace) { 3386 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3387 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3388 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3389 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3390 c->i = a->i; 3391 c->j = a->j; 3392 c->singlemalloc = PETSC_FALSE; 3393 c->free_a = PETSC_TRUE; 3394 c->free_ij = PETSC_FALSE; 3395 c->parent = A; 3396 C->preallocated = PETSC_TRUE; 3397 C->assembled = PETSC_TRUE; 3398 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3399 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3400 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3401 } else { 3402 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3403 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3404 c->singlemalloc = PETSC_TRUE; 3405 c->free_a = PETSC_TRUE; 3406 c->free_ij = PETSC_TRUE; 3407 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3408 if (mbs > 0) { 3409 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3410 if (cpvalues == MAT_COPY_VALUES) { 3411 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3412 } else { 3413 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3414 } 3415 } 3416 C->preallocated = PETSC_TRUE; 3417 C->assembled = PETSC_TRUE; 3418 } 3419 } 3420 3421 c->roworiented = a->roworiented; 3422 c->nonew = a->nonew; 3423 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3424 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3425 c->bs2 = a->bs2; 3426 c->mbs = a->mbs; 3427 c->nbs = a->nbs; 3428 3429 if (a->diag) { 3430 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3431 c->diag = a->diag; 3432 c->free_diag = PETSC_FALSE; 3433 } else { 3434 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3435 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3436 for (i=0; i<mbs; i++) { 3437 c->diag[i] = a->diag[i]; 3438 } 3439 c->free_diag = PETSC_TRUE; 3440 } 3441 } else c->diag = 0; 3442 c->nz = a->nz; 3443 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3444 c->solve_work = 0; 3445 c->mult_work = 0; 3446 3447 c->compressedrow.use = a->compressedrow.use; 3448 c->compressedrow.nrows = a->compressedrow.nrows; 3449 c->compressedrow.check = a->compressedrow.check; 3450 if (a->compressedrow.use) { 3451 i = a->compressedrow.nrows; 3452 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3453 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3454 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3455 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3456 } else { 3457 c->compressedrow.use = PETSC_FALSE; 3458 c->compressedrow.i = PETSC_NULL; 3459 c->compressedrow.rindex = PETSC_NULL; 3460 } 3461 C->same_nonzero = A->same_nonzero; 3462 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3463 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3464 PetscFunctionReturn(0); 3465 } 3466 3467 #undef __FUNCT__ 3468 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3469 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3470 { 3471 PetscErrorCode ierr; 3472 3473 PetscFunctionBegin; 3474 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3475 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3476 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3477 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3478 PetscFunctionReturn(0); 3479 } 3480 3481 #undef __FUNCT__ 3482 #define __FUNCT__ "MatLoad_SeqBAIJ" 3483 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3484 { 3485 Mat_SeqBAIJ *a; 3486 PetscErrorCode ierr; 3487 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3488 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3489 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3490 PetscInt *masked,nmask,tmp,bs2,ishift; 3491 PetscMPIInt size; 3492 int fd; 3493 PetscScalar *aa; 3494 MPI_Comm comm = ((PetscObject)viewer)->comm; 3495 3496 PetscFunctionBegin; 3497 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3498 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3499 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3500 bs2 = bs*bs; 3501 3502 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3503 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3504 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3505 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3506 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3507 M = header[1]; N = header[2]; nz = header[3]; 3508 3509 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3510 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3511 3512 /* 3513 This code adds extra rows to make sure the number of rows is 3514 divisible by the blocksize 3515 */ 3516 mbs = M/bs; 3517 extra_rows = bs - M + bs*(mbs); 3518 if (extra_rows == bs) extra_rows = 0; 3519 else mbs++; 3520 if (extra_rows) { 3521 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3522 } 3523 3524 /* Set global sizes if not already set */ 3525 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3526 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3527 } else { /* Check if the matrix global sizes are correct */ 3528 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3529 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3530 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3531 } 3532 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3533 } 3534 3535 /* read in row lengths */ 3536 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3537 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3538 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3539 3540 /* read in column indices */ 3541 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3542 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3543 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3544 3545 /* loop over row lengths determining block row lengths */ 3546 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3547 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3548 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3549 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3550 rowcount = 0; 3551 nzcount = 0; 3552 for (i=0; i<mbs; i++) { 3553 nmask = 0; 3554 for (j=0; j<bs; j++) { 3555 kmax = rowlengths[rowcount]; 3556 for (k=0; k<kmax; k++) { 3557 tmp = jj[nzcount++]/bs; 3558 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3559 } 3560 rowcount++; 3561 } 3562 browlengths[i] += nmask; 3563 /* zero out the mask elements we set */ 3564 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3565 } 3566 3567 /* Do preallocation */ 3568 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3569 a = (Mat_SeqBAIJ*)newmat->data; 3570 3571 /* set matrix "i" values */ 3572 a->i[0] = 0; 3573 for (i=1; i<= mbs; i++) { 3574 a->i[i] = a->i[i-1] + browlengths[i-1]; 3575 a->ilen[i-1] = browlengths[i-1]; 3576 } 3577 a->nz = 0; 3578 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3579 3580 /* read in nonzero values */ 3581 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3582 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3583 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3584 3585 /* set "a" and "j" values into matrix */ 3586 nzcount = 0; jcount = 0; 3587 for (i=0; i<mbs; i++) { 3588 nzcountb = nzcount; 3589 nmask = 0; 3590 for (j=0; j<bs; j++) { 3591 kmax = rowlengths[i*bs+j]; 3592 for (k=0; k<kmax; k++) { 3593 tmp = jj[nzcount++]/bs; 3594 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3595 } 3596 } 3597 /* sort the masked values */ 3598 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3599 3600 /* set "j" values into matrix */ 3601 maskcount = 1; 3602 for (j=0; j<nmask; j++) { 3603 a->j[jcount++] = masked[j]; 3604 mask[masked[j]] = maskcount++; 3605 } 3606 /* set "a" values into matrix */ 3607 ishift = bs2*a->i[i]; 3608 for (j=0; j<bs; j++) { 3609 kmax = rowlengths[i*bs+j]; 3610 for (k=0; k<kmax; k++) { 3611 tmp = jj[nzcountb]/bs ; 3612 block = mask[tmp] - 1; 3613 point = jj[nzcountb] - bs*tmp; 3614 idx = ishift + bs2*block + j + bs*point; 3615 a->a[idx] = (MatScalar)aa[nzcountb++]; 3616 } 3617 } 3618 /* zero out the mask elements we set */ 3619 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3620 } 3621 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3622 3623 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3624 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3625 ierr = PetscFree(aa);CHKERRQ(ierr); 3626 ierr = PetscFree(jj);CHKERRQ(ierr); 3627 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3628 3629 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3630 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 PetscFunctionReturn(0); 3632 } 3633 3634 #undef __FUNCT__ 3635 #define __FUNCT__ "MatCreateSeqBAIJ" 3636 /*@C 3637 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3638 compressed row) format. For good matrix assembly performance the 3639 user should preallocate the matrix storage by setting the parameter nz 3640 (or the array nnz). By setting these parameters accurately, performance 3641 during matrix assembly can be increased by more than a factor of 50. 3642 3643 Collective on MPI_Comm 3644 3645 Input Parameters: 3646 + comm - MPI communicator, set to PETSC_COMM_SELF 3647 . bs - size of block 3648 . m - number of rows 3649 . n - number of columns 3650 . nz - number of nonzero blocks per block row (same for all rows) 3651 - nnz - array containing the number of nonzero blocks in the various block rows 3652 (possibly different for each block row) or PETSC_NULL 3653 3654 Output Parameter: 3655 . A - the matrix 3656 3657 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3658 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3659 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3660 3661 Options Database Keys: 3662 . -mat_no_unroll - uses code that does not unroll the loops in the 3663 block calculations (much slower) 3664 . -mat_block_size - size of the blocks to use 3665 3666 Level: intermediate 3667 3668 Notes: 3669 The number of rows and columns must be divisible by blocksize. 3670 3671 If the nnz parameter is given then the nz parameter is ignored 3672 3673 A nonzero block is any block that as 1 or more nonzeros in it 3674 3675 The block AIJ format is fully compatible with standard Fortran 77 3676 storage. That is, the stored row and column indices can begin at 3677 either one (as in Fortran) or zero. See the users' manual for details. 3678 3679 Specify the preallocated storage with either nz or nnz (not both). 3680 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3681 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3682 matrices. 3683 3684 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3685 @*/ 3686 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3687 { 3688 PetscErrorCode ierr; 3689 3690 PetscFunctionBegin; 3691 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3692 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3693 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3694 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3695 PetscFunctionReturn(0); 3696 } 3697 3698 #undef __FUNCT__ 3699 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3700 /*@C 3701 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3702 per row in the matrix. For good matrix assembly performance the 3703 user should preallocate the matrix storage by setting the parameter nz 3704 (or the array nnz). By setting these parameters accurately, performance 3705 during matrix assembly can be increased by more than a factor of 50. 3706 3707 Collective on MPI_Comm 3708 3709 Input Parameters: 3710 + A - the matrix 3711 . bs - size of block 3712 . nz - number of block nonzeros per block row (same for all rows) 3713 - nnz - array containing the number of block nonzeros in the various block rows 3714 (possibly different for each block row) or PETSC_NULL 3715 3716 Options Database Keys: 3717 . -mat_no_unroll - uses code that does not unroll the loops in the 3718 block calculations (much slower) 3719 . -mat_block_size - size of the blocks to use 3720 3721 Level: intermediate 3722 3723 Notes: 3724 If the nnz parameter is given then the nz parameter is ignored 3725 3726 You can call MatGetInfo() to get information on how effective the preallocation was; 3727 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3728 You can also run with the option -info and look for messages with the string 3729 malloc in them to see if additional memory allocation was needed. 3730 3731 The block AIJ format is fully compatible with standard Fortran 77 3732 storage. That is, the stored row and column indices can begin at 3733 either one (as in Fortran) or zero. See the users' manual for details. 3734 3735 Specify the preallocated storage with either nz or nnz (not both). 3736 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3737 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3738 3739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3740 @*/ 3741 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3742 { 3743 PetscErrorCode ierr; 3744 3745 PetscFunctionBegin; 3746 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3747 PetscValidType(B,1); 3748 PetscValidLogicalCollectiveInt(B,bs,2); 3749 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3750 PetscFunctionReturn(0); 3751 } 3752 3753 #undef __FUNCT__ 3754 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3755 /*@C 3756 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3757 (the default sequential PETSc format). 3758 3759 Collective on MPI_Comm 3760 3761 Input Parameters: 3762 + A - the matrix 3763 . i - the indices into j for the start of each local row (starts with zero) 3764 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3765 - v - optional values in the matrix 3766 3767 Level: developer 3768 3769 .keywords: matrix, aij, compressed row, sparse 3770 3771 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3772 @*/ 3773 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3774 { 3775 PetscErrorCode ierr; 3776 3777 PetscFunctionBegin; 3778 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3779 PetscValidType(B,1); 3780 PetscValidLogicalCollectiveInt(B,bs,2); 3781 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3782 PetscFunctionReturn(0); 3783 } 3784 3785 3786 #undef __FUNCT__ 3787 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3788 /*@ 3789 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3790 3791 Collective on MPI_Comm 3792 3793 Input Parameters: 3794 + comm - must be an MPI communicator of size 1 3795 . bs - size of block 3796 . m - number of rows 3797 . n - number of columns 3798 . i - row indices 3799 . j - column indices 3800 - a - matrix values 3801 3802 Output Parameter: 3803 . mat - the matrix 3804 3805 Level: advanced 3806 3807 Notes: 3808 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3809 once the matrix is destroyed 3810 3811 You cannot set new nonzero locations into this matrix, that will generate an error. 3812 3813 The i and j indices are 0 based 3814 3815 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3816 3817 3818 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3819 3820 @*/ 3821 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3822 { 3823 PetscErrorCode ierr; 3824 PetscInt ii; 3825 Mat_SeqBAIJ *baij; 3826 3827 PetscFunctionBegin; 3828 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3829 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3830 3831 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3832 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3833 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3834 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3835 baij = (Mat_SeqBAIJ*)(*mat)->data; 3836 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3837 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3838 3839 baij->i = i; 3840 baij->j = j; 3841 baij->a = a; 3842 baij->singlemalloc = PETSC_FALSE; 3843 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3844 baij->free_a = PETSC_FALSE; 3845 baij->free_ij = PETSC_FALSE; 3846 3847 for (ii=0; ii<m; ii++) { 3848 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3849 #if defined(PETSC_USE_DEBUG) 3850 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3851 #endif 3852 } 3853 #if defined(PETSC_USE_DEBUG) 3854 for (ii=0; ii<baij->i[m]; ii++) { 3855 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3856 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3857 } 3858 #endif 3859 3860 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3861 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3862 PetscFunctionReturn(0); 3863 } 3864