1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,s1,s2; 224 const PetscScalar *b; 225 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 232 its = its*lits; 233 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 248 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 250 i2 = 2; 251 idiag += 4; 252 for (i=1; i<m; i++) { 253 v = aa + 4*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; 257 for (j=0; j<nz; j++) { 258 idx = 2*vi[j]; 259 it = 4*j; 260 x1 = x[idx]; x2 = x[1+idx]; 261 s1 -= v[it]*x1 + v[it+2]*x2; 262 s2 -= v[it+1]*x1 + v[it+3]*x2; 263 } 264 x[i2] = idiag[0]*s1 + idiag[2]*s2; 265 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 266 idiag += 4; 267 i2 += 2; 268 } 269 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 270 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 271 } 272 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 273 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 274 i2 = 0; 275 mdiag = a->idiag+4*a->mbs; 276 for (i=0; i<m; i++) { 277 x1 = x[i2]; x2 = x[i2+1]; 278 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 279 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 280 mdiag += 4; 281 i2 += 2; 282 } 283 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 284 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 286 } 287 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 idiag = a->idiag+4*a->mbs - 4; 289 i2 = 2*m - 2; 290 x1 = x[i2]; x2 = x[i2+1]; 291 x[i2] = idiag[0]*x1 + idiag[2]*x2; 292 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 293 idiag -= 4; 294 i2 -= 2; 295 for (i=m-2; i>=0; i--) { 296 v = aa + 4*(diag[i]+1); 297 vi = aj + diag[i] + 1; 298 nz = ai[i+1] - diag[i] - 1; 299 s1 = x[i2]; s2 = x[i2+1]; 300 for (j=0; j<nz; j++) { 301 idx = 2*vi[j]; 302 it = 4*j; 303 x1 = x[idx]; x2 = x[1+idx]; 304 s1 -= v[it]*x1 + v[it+2]*x2; 305 s2 -= v[it+1]*x1 + v[it+3]*x2; 306 } 307 x[i2] = idiag[0]*s1 + idiag[2]*s2; 308 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 309 idiag -= 4; 310 i2 -= 2; 311 } 312 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 313 } 314 } else { 315 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 316 } 317 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 PetscScalar *x,x1,x2,x3,s1,s2,s3; 328 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 329 const PetscScalar *b; 330 PetscErrorCode ierr; 331 PetscInt m = a->mbs,i,i2,nz,idx; 332 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 333 334 PetscFunctionBegin; 335 its = its*lits; 336 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 337 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 338 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 339 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 340 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 341 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 342 343 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 344 345 diag = a->diag; 346 idiag = a->idiag; 347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 349 350 if (flag & SOR_ZERO_INITIAL_GUESS) { 351 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 352 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 353 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 354 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 355 i2 = 3; 356 idiag += 9; 357 for (i=1; i<m; i++) { 358 v = aa + 9*ai[i]; 359 vi = aj + ai[i]; 360 nz = diag[i] - ai[i]; 361 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 362 while (nz--) { 363 idx = 3*(*vi++); 364 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 365 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 366 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 367 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 368 v += 9; 369 } 370 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 371 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 372 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 373 idiag += 9; 374 i2 += 3; 375 } 376 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 377 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 378 } 379 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 380 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 381 i2 = 0; 382 mdiag = a->idiag+9*a->mbs; 383 for (i=0; i<m; i++) { 384 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 385 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 386 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 387 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 388 mdiag += 9; 389 i2 += 3; 390 } 391 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 392 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 393 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 394 } 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 396 idiag = a->idiag+9*a->mbs - 9; 397 i2 = 3*m - 3; 398 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 399 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 400 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 401 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 402 idiag -= 9; 403 i2 -= 3; 404 for (i=m-2; i>=0; i--) { 405 v = aa + 9*(diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 409 while (nz--) { 410 idx = 3*(*vi++); 411 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 412 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 413 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 414 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 415 v += 9; 416 } 417 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 418 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 419 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 420 idiag -= 9; 421 i2 -= 3; 422 } 423 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 424 } 425 } else { 426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 427 } 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 439 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 440 const PetscScalar *b; 441 PetscErrorCode ierr; 442 PetscInt m = a->mbs,i,i2,nz,idx; 443 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 444 445 PetscFunctionBegin; 446 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 447 its = its*lits; 448 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 449 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 450 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 451 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 452 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 453 454 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 455 456 diag = a->diag; 457 idiag = a->idiag; 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 460 461 if (flag & SOR_ZERO_INITIAL_GUESS) { 462 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 463 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 464 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 465 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 466 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 467 i2 = 4; 468 idiag += 16; 469 for (i=1; i<m; i++) { 470 v = aa + 16*ai[i]; 471 vi = aj + ai[i]; 472 nz = diag[i] - ai[i]; 473 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 474 while (nz--) { 475 idx = 4*(*vi++); 476 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 477 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 478 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 479 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 480 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 481 v += 16; 482 } 483 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 484 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 485 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 486 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 487 idiag += 16; 488 i2 += 4; 489 } 490 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 491 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 492 } 493 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 494 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 495 i2 = 0; 496 mdiag = a->idiag+16*a->mbs; 497 for (i=0; i<m; i++) { 498 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 499 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 500 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 501 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 502 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 503 mdiag += 16; 504 i2 += 4; 505 } 506 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 507 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 508 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 509 } 510 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 511 idiag = a->idiag+16*a->mbs - 16; 512 i2 = 4*m - 4; 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 515 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 516 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 517 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 518 idiag -= 16; 519 i2 -= 4; 520 for (i=m-2; i>=0; i--) { 521 v = aa + 16*(diag[i]+1); 522 vi = aj + diag[i] + 1; 523 nz = ai[i+1] - diag[i] - 1; 524 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 525 while (nz--) { 526 idx = 4*(*vi++); 527 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 528 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 529 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 530 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 531 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 532 v += 16; 533 } 534 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 535 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 536 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 537 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 538 idiag -= 16; 539 i2 -= 4; 540 } 541 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 542 } 543 } else { 544 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 545 } 546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 547 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 554 { 555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 556 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 557 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 558 const PetscScalar *b; 559 PetscErrorCode ierr; 560 PetscInt m = a->mbs,i,i2,nz,idx; 561 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 562 563 PetscFunctionBegin; 564 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 565 its = its*lits; 566 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 567 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 568 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 569 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 570 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 571 572 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 573 574 diag = a->diag; 575 idiag = a->idiag; 576 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 578 579 if (flag & SOR_ZERO_INITIAL_GUESS) { 580 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 581 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 582 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 583 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 584 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 585 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 586 i2 = 5; 587 idiag += 25; 588 for (i=1; i<m; i++) { 589 v = aa + 25*ai[i]; 590 vi = aj + ai[i]; 591 nz = diag[i] - ai[i]; 592 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 593 while (nz--) { 594 idx = 5*(*vi++); 595 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 596 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 597 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 598 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 599 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 600 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 601 v += 25; 602 } 603 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 604 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 605 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 606 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 607 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 608 idiag += 25; 609 i2 += 5; 610 } 611 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 612 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 613 } 614 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 615 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 616 i2 = 0; 617 mdiag = a->idiag+25*a->mbs; 618 for (i=0; i<m; i++) { 619 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 620 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 621 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 622 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 623 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 624 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 625 mdiag += 25; 626 i2 += 5; 627 } 628 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 629 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 630 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 631 } 632 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 633 idiag = a->idiag+25*a->mbs - 25; 634 i2 = 5*m - 5; 635 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 636 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 637 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 638 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 639 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 640 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 641 idiag -= 25; 642 i2 -= 5; 643 for (i=m-2; i>=0; i--) { 644 v = aa + 25*(diag[i]+1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i+1] - diag[i] - 1; 647 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 651 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 652 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 653 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 654 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 655 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 656 v += 25; 657 } 658 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 659 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 660 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 661 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 662 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 663 idiag -= 25; 664 i2 -= 5; 665 } 666 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 670 } 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 679 { 680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 681 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 682 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 683 const PetscScalar *b; 684 PetscErrorCode ierr; 685 PetscInt m = a->mbs,i,i2,nz,idx; 686 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 687 688 PetscFunctionBegin; 689 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 690 its = its*lits; 691 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 692 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 693 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 694 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 695 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 696 697 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 698 699 diag = a->diag; 700 idiag = a->idiag; 701 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 703 704 if (flag & SOR_ZERO_INITIAL_GUESS) { 705 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 706 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 707 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 708 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 709 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 710 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 711 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 712 i2 = 6; 713 idiag += 36; 714 for (i=1; i<m; i++) { 715 v = aa + 36*ai[i]; 716 vi = aj + ai[i]; 717 nz = diag[i] - ai[i]; 718 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 719 while (nz--) { 720 idx = 6*(*vi++); 721 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 722 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 723 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 724 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 725 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 726 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 727 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 728 v += 36; 729 } 730 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 731 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 732 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 733 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 734 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 735 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 736 idiag += 36; 737 i2 += 6; 738 } 739 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 740 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 741 } 742 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 743 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 744 i2 = 0; 745 mdiag = a->idiag+36*a->mbs; 746 for (i=0; i<m; i++) { 747 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 748 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 749 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 750 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 751 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 752 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 753 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 754 mdiag += 36; 755 i2 += 6; 756 } 757 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 758 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 759 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 760 } 761 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 762 idiag = a->idiag+36*a->mbs - 36; 763 i2 = 6*m - 6; 764 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 765 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 766 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 767 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 768 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 769 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 770 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 771 idiag -= 36; 772 i2 -= 6; 773 for (i=m-2; i>=0; i--) { 774 v = aa + 36*(diag[i]+1); 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 778 while (nz--) { 779 idx = 6*(*vi++); 780 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 781 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 782 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 783 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 784 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 785 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 786 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 787 v += 36; 788 } 789 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 790 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 791 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 792 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 793 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 794 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 795 idiag -= 36; 796 i2 -= 6; 797 } 798 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 799 } 800 } else { 801 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 802 } 803 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 813 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 814 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 815 const PetscScalar *b; 816 PetscErrorCode ierr; 817 PetscInt m = a->mbs,i,i2,nz,idx; 818 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 819 820 PetscFunctionBegin; 821 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 822 its = its*lits; 823 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 824 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 825 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 826 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 827 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 828 829 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 830 831 diag = a->diag; 832 idiag = a->idiag; 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 835 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 838 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 839 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 840 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 841 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 842 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 843 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 844 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 845 i2 = 7; 846 idiag += 49; 847 for (i=1; i<m; i++) { 848 v = aa + 49*ai[i]; 849 vi = aj + ai[i]; 850 nz = diag[i] - ai[i]; 851 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 852 while (nz--) { 853 idx = 7*(*vi++); 854 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 855 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 856 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 857 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 858 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 859 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 860 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 861 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 862 v += 49; 863 } 864 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 865 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 866 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 867 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 868 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 869 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 870 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 871 idiag += 49; 872 i2 += 7; 873 } 874 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 875 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 876 } 877 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 878 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 879 i2 = 0; 880 mdiag = a->idiag+49*a->mbs; 881 for (i=0; i<m; i++) { 882 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 883 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 884 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 885 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 886 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 887 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 888 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 889 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 890 mdiag += 49; 891 i2 += 7; 892 } 893 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 894 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 895 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 896 } 897 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 898 idiag = a->idiag+49*a->mbs - 49; 899 i2 = 7*m - 7; 900 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 901 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 902 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 903 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 904 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 905 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 906 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 907 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 908 idiag -= 49; 909 i2 -= 7; 910 for (i=m-2; i>=0; i--) { 911 v = aa + 49*(diag[i]+1); 912 vi = aj + diag[i] + 1; 913 nz = ai[i+1] - diag[i] - 1; 914 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 918 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 919 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 920 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 921 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 922 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 923 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 924 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 925 v += 49; 926 } 927 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 928 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 929 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 930 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 931 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 932 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 933 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 934 idiag -= 49; 935 i2 -= 7; 936 } 937 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 938 } 939 } else { 940 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 941 } 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 950 { 951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 952 PetscScalar *x,*work,*w,*workt; 953 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 954 const PetscScalar *b; 955 PetscErrorCode ierr; 956 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 957 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 958 959 PetscFunctionBegin; 960 its = its*lits; 961 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 962 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 963 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 964 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 965 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 966 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 967 968 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 969 970 diag = a->diag; 971 idiag = a->idiag; 972 if (!a->mult_work) { 973 k = PetscMax(A->rmap->n,A->cmap->n); 974 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 975 } 976 work = a->mult_work; 977 if (!a->sor_work) { 978 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 979 } 980 w = a->sor_work; 981 982 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 984 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 987 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 988 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 989 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 990 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 991 i2 = bs; 992 idiag += bs2; 993 for (i=1; i<m; i++) { 994 v = aa + bs2*ai[i]; 995 vi = aj + ai[i]; 996 nz = diag[i] - ai[i]; 997 998 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 999 /* copy all rows of x that are needed into contiguous space */ 1000 workt = work; 1001 for (j=0; j<nz; j++) { 1002 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1003 workt += bs; 1004 } 1005 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1006 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1007 while (nz--) { 1008 idx = N*(*vi++); 1009 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1010 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1011 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1012 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1013 v += N2; 1014 } */ 1015 1016 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1017 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1018 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1019 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1020 1021 idiag += bs2; 1022 i2 += bs; 1023 } 1024 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1025 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1026 } 1027 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1028 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1029 i2 = 0; 1030 mdiag = a->idiag+bs2*a->mbs; 1031 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 for (i=0; i<m; i++) { 1033 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1034 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1035 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1036 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1037 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1038 1039 mdiag += bs2; 1040 i2 += bs; 1041 } 1042 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1043 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1044 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1047 idiag = a->idiag+bs2*a->mbs - bs2; 1048 i2 = bs*m - bs; 1049 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1050 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1051 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1052 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1053 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1054 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1055 idiag -= bs2; 1056 i2 -= bs; 1057 for (i=m-2; i>=0; i--) { 1058 v = aa + bs2*(diag[i]+1); 1059 vi = aj + diag[i] + 1; 1060 nz = ai[i+1] - diag[i] - 1; 1061 1062 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 /* copy all rows of x that are needed into contiguous space */ 1064 workt = work; 1065 for (j=0; j<nz; j++) { 1066 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1067 workt += bs; 1068 } 1069 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1070 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1071 while (nz--) { 1072 idx = N*(*vi++); 1073 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1074 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1075 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1076 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1077 v += N2; 1078 } */ 1079 1080 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1081 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1082 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1083 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1084 idiag -= bs2; 1085 i2 -= bs; 1086 } 1087 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1088 } 1089 } else { 1090 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1091 } 1092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 /* 1098 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1099 */ 1100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1103 #define matsetvaluesblocked4_ matsetvaluesblocked4 1104 #endif 1105 1106 EXTERN_C_BEGIN 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "matsetvaluesblocked4_" 1109 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1110 { 1111 Mat A = *AA; 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1114 PetscInt *ai=a->i,*ailen=a->ilen; 1115 PetscInt *aj=a->j,stepval,lastcol = -1; 1116 const PetscScalar *value = v; 1117 MatScalar *ap,*aa = a->a,*bap; 1118 1119 PetscFunctionBegin; 1120 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1121 stepval = (n-1)*4; 1122 for (k=0; k<m; k++) { /* loop over added rows */ 1123 row = im[k]; 1124 rp = aj + ai[row]; 1125 ap = aa + 16*ai[row]; 1126 nrow = ailen[row]; 1127 low = 0; 1128 high = nrow; 1129 for (l=0; l<n; l++) { /* loop over added columns */ 1130 col = in[l]; 1131 if (col <= lastcol) low = 0; 1132 else high = nrow; 1133 lastcol = col; 1134 value = v + k*(stepval+4 + l)*4; 1135 while (high-low > 7) { 1136 t = (low+high)/2; 1137 if (rp[t] > col) high = t; 1138 else low = t; 1139 } 1140 for (i=low; i<high; i++) { 1141 if (rp[i] > col) break; 1142 if (rp[i] == col) { 1143 bap = ap + 16*i; 1144 for (ii=0; ii<4; ii++,value+=stepval) { 1145 for (jj=ii; jj<16; jj+=4) { 1146 bap[jj] += *value++; 1147 } 1148 } 1149 goto noinsert2; 1150 } 1151 } 1152 N = nrow++ - 1; 1153 high++; /* added new column index thus must search to one higher than before */ 1154 /* shift up all the later entries in this row */ 1155 for (ii=N; ii>=i; ii--) { 1156 rp[ii+1] = rp[ii]; 1157 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1158 } 1159 if (N >= i) { 1160 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1161 } 1162 rp[i] = col; 1163 bap = ap + 16*i; 1164 for (ii=0; ii<4; ii++,value+=stepval) { 1165 for (jj=ii; jj<16; jj+=4) { 1166 bap[jj] = *value++; 1167 } 1168 } 1169 noinsert2:; 1170 low = i; 1171 } 1172 ailen[row] = nrow; 1173 } 1174 PetscFunctionReturnVoid(); 1175 } 1176 EXTERN_C_END 1177 1178 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1179 #define matsetvalues4_ MATSETVALUES4 1180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1181 #define matsetvalues4_ matsetvalues4 1182 #endif 1183 1184 EXTERN_C_BEGIN 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatSetValues4_" 1187 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1188 { 1189 Mat A = *AA; 1190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1191 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1192 PetscInt *ai=a->i,*ailen=a->ilen; 1193 PetscInt *aj=a->j,brow,bcol; 1194 PetscInt ridx,cidx,lastcol = -1; 1195 MatScalar *ap,value,*aa=a->a,*bap; 1196 1197 PetscFunctionBegin; 1198 for (k=0; k<m; k++) { /* loop over added rows */ 1199 row = im[k]; brow = row/4; 1200 rp = aj + ai[brow]; 1201 ap = aa + 16*ai[brow]; 1202 nrow = ailen[brow]; 1203 low = 0; 1204 high = nrow; 1205 for (l=0; l<n; l++) { /* loop over added columns */ 1206 col = in[l]; bcol = col/4; 1207 ridx = row % 4; cidx = col % 4; 1208 value = v[l + k*n]; 1209 if (col <= lastcol) low = 0; 1210 else high = nrow; 1211 lastcol = col; 1212 while (high-low > 7) { 1213 t = (low+high)/2; 1214 if (rp[t] > bcol) high = t; 1215 else low = t; 1216 } 1217 for (i=low; i<high; i++) { 1218 if (rp[i] > bcol) break; 1219 if (rp[i] == bcol) { 1220 bap = ap + 16*i + 4*cidx + ridx; 1221 *bap += value; 1222 goto noinsert1; 1223 } 1224 } 1225 N = nrow++ - 1; 1226 high++; /* added new column thus must search to one higher than before */ 1227 /* shift up all the later entries in this row */ 1228 for (ii=N; ii>=i; ii--) { 1229 rp[ii+1] = rp[ii]; 1230 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1231 } 1232 if (N>=i) { 1233 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1234 } 1235 rp[i] = bcol; 1236 ap[16*i + 4*cidx + ridx] = value; 1237 noinsert1:; 1238 low = i; 1239 } 1240 ailen[brow] = nrow; 1241 } 1242 PetscFunctionReturnVoid(); 1243 } 1244 EXTERN_C_END 1245 1246 /* 1247 Checks for missing diagonals 1248 */ 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1251 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1252 { 1253 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1254 PetscErrorCode ierr; 1255 PetscInt *diag,*jj = a->j,i; 1256 1257 PetscFunctionBegin; 1258 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1259 *missing = PETSC_FALSE; 1260 if (A->rmap->n > 0 && !jj) { 1261 *missing = PETSC_TRUE; 1262 if (d) *d = 0; 1263 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1264 } else { 1265 diag = a->diag; 1266 for (i=0; i<a->mbs; i++) { 1267 if (jj[diag[i]] != i) { 1268 *missing = PETSC_TRUE; 1269 if (d) *d = i; 1270 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1271 break; 1272 } 1273 } 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1280 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1281 { 1282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1283 PetscErrorCode ierr; 1284 PetscInt i,j,m = a->mbs; 1285 1286 PetscFunctionBegin; 1287 if (!a->diag) { 1288 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1289 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1290 a->free_diag = PETSC_TRUE; 1291 } 1292 for (i=0; i<m; i++) { 1293 a->diag[i] = a->i[i+1]; 1294 for (j=a->i[i]; j<a->i[i+1]; j++) { 1295 if (a->j[j] == i) { 1296 a->diag[i] = j; 1297 break; 1298 } 1299 } 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1310 PetscErrorCode ierr; 1311 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1312 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1313 1314 PetscFunctionBegin; 1315 *nn = n; 1316 if (!ia) PetscFunctionReturn(0); 1317 if (symmetric) { 1318 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1319 nz = tia[n]; 1320 } else { 1321 tia = a->i; tja = a->j; 1322 } 1323 1324 if (!blockcompressed && bs > 1) { 1325 (*nn) *= bs; 1326 /* malloc & create the natural set of indices */ 1327 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1328 if (n) { 1329 (*ia)[0] = 0; 1330 for (j=1; j<bs; j++) { 1331 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1332 } 1333 } 1334 1335 for (i=1; i<n; i++) { 1336 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1337 for (j=1; j<bs; j++) { 1338 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1339 } 1340 } 1341 if (n) { 1342 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1343 } 1344 1345 if (inja) { 1346 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1347 cnt = 0; 1348 for (i=0; i<n; i++) { 1349 for (j=0; j<bs; j++) { 1350 for (k=tia[i]; k<tia[i+1]; k++) { 1351 for (l=0; l<bs; l++) { 1352 (*ja)[cnt++] = bs*tja[k] + l; 1353 } 1354 } 1355 } 1356 } 1357 } 1358 1359 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1360 ierr = PetscFree(tia);CHKERRQ(ierr); 1361 ierr = PetscFree(tja);CHKERRQ(ierr); 1362 } 1363 } else if (oshift == 1) { 1364 if (symmetric) { 1365 nz = tia[A->rmap->n/bs]; 1366 /* add 1 to i and j indices */ 1367 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1368 *ia = tia; 1369 if (ja) { 1370 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1371 *ja = tja; 1372 } 1373 } else { 1374 nz = a->i[A->rmap->n/bs]; 1375 /* malloc space and add 1 to i and j indices */ 1376 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1377 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1378 if (ja) { 1379 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1380 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1381 } 1382 } 1383 } else { 1384 *ia = tia; 1385 if (ja) *ja = tja; 1386 } 1387 PetscFunctionReturn(0); 1388 } 1389 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1392 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1393 { 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 if (!ia) PetscFunctionReturn(0); 1398 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1399 ierr = PetscFree(*ia);CHKERRQ(ierr); 1400 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1407 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1408 { 1409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 #if defined(PETSC_USE_LOG) 1414 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1415 #endif 1416 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1417 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1418 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1419 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1420 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1421 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1422 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1423 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1424 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1425 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1426 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1427 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1428 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1429 1430 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1431 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1432 ierr = PetscFree(A->data);CHKERRQ(ierr); 1433 1434 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1436 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1437 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1443 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1450 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1451 { 1452 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1453 PetscErrorCode ierr; 1454 1455 PetscFunctionBegin; 1456 switch (op) { 1457 case MAT_ROW_ORIENTED: 1458 a->roworiented = flg; 1459 break; 1460 case MAT_KEEP_NONZERO_PATTERN: 1461 a->keepnonzeropattern = flg; 1462 break; 1463 case MAT_NEW_NONZERO_LOCATIONS: 1464 a->nonew = (flg ? 0 : 1); 1465 break; 1466 case MAT_NEW_NONZERO_LOCATION_ERR: 1467 a->nonew = (flg ? -1 : 0); 1468 break; 1469 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1470 a->nonew = (flg ? -2 : 0); 1471 break; 1472 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1473 a->nounused = (flg ? -1 : 0); 1474 break; 1475 case MAT_CHECK_COMPRESSED_ROW: 1476 a->compressedrow.check = flg; 1477 break; 1478 case MAT_NEW_DIAGONALS: 1479 case MAT_IGNORE_OFF_PROC_ENTRIES: 1480 case MAT_USE_HASH_TABLE: 1481 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1482 break; 1483 case MAT_SPD: 1484 case MAT_SYMMETRIC: 1485 case MAT_STRUCTURALLY_SYMMETRIC: 1486 case MAT_HERMITIAN: 1487 case MAT_SYMMETRY_ETERNAL: 1488 /* These options are handled directly by MatSetOption() */ 1489 break; 1490 default: 1491 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1492 } 1493 PetscFunctionReturn(0); 1494 } 1495 1496 #undef __FUNCT__ 1497 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1498 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1499 { 1500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1503 MatScalar *aa,*aa_i; 1504 PetscScalar *v_i; 1505 1506 PetscFunctionBegin; 1507 bs = A->rmap->bs; 1508 ai = a->i; 1509 aj = a->j; 1510 aa = a->a; 1511 bs2 = a->bs2; 1512 1513 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1514 1515 bn = row/bs; /* Block number */ 1516 bp = row % bs; /* Block Position */ 1517 M = ai[bn+1] - ai[bn]; 1518 *nz = bs*M; 1519 1520 if (v) { 1521 *v = 0; 1522 if (*nz) { 1523 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1524 for (i=0; i<M; i++) { /* for each block in the block row */ 1525 v_i = *v + i*bs; 1526 aa_i = aa + bs2*(ai[bn] + i); 1527 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1528 } 1529 } 1530 } 1531 1532 if (idx) { 1533 *idx = 0; 1534 if (*nz) { 1535 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1536 for (i=0; i<M; i++) { /* for each block in the block row */ 1537 idx_i = *idx + i*bs; 1538 itmp = bs*aj[ai[bn] + i]; 1539 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1540 } 1541 } 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1548 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1549 { 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1554 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1555 PetscFunctionReturn(0); 1556 } 1557 1558 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1562 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1563 { 1564 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1565 Mat C; 1566 PetscErrorCode ierr; 1567 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1568 PetscInt *rows,*cols,bs2=a->bs2; 1569 MatScalar *array; 1570 1571 PetscFunctionBegin; 1572 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1573 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1574 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1575 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1576 1577 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1578 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1579 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1580 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1581 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1582 ierr = PetscFree(col);CHKERRQ(ierr); 1583 } else { 1584 C = *B; 1585 } 1586 1587 array = a->a; 1588 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1589 for (i=0; i<mbs; i++) { 1590 cols[0] = i*bs; 1591 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1592 len = ai[i+1] - ai[i]; 1593 for (j=0; j<len; j++) { 1594 rows[0] = (*aj++)*bs; 1595 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1596 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1597 array += bs2; 1598 } 1599 } 1600 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1601 1602 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1603 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1604 1605 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1606 *B = C; 1607 } else { 1608 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1609 } 1610 PetscFunctionReturn(0); 1611 } 1612 1613 EXTERN_C_BEGIN 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1616 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1617 { 1618 PetscErrorCode ierr; 1619 Mat Btrans; 1620 1621 PetscFunctionBegin; 1622 *f = PETSC_FALSE; 1623 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1624 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1625 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 EXTERN_C_END 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1632 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1633 { 1634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1635 PetscErrorCode ierr; 1636 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1637 int fd; 1638 PetscScalar *aa; 1639 FILE *file; 1640 1641 PetscFunctionBegin; 1642 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1643 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1644 col_lens[0] = MAT_FILE_CLASSID; 1645 1646 col_lens[1] = A->rmap->N; 1647 col_lens[2] = A->cmap->n; 1648 col_lens[3] = a->nz*bs2; 1649 1650 /* store lengths of each row and write (including header) to file */ 1651 count = 0; 1652 for (i=0; i<a->mbs; i++) { 1653 for (j=0; j<bs; j++) { 1654 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1655 } 1656 } 1657 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1658 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1659 1660 /* store column indices (zero start index) */ 1661 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1662 count = 0; 1663 for (i=0; i<a->mbs; i++) { 1664 for (j=0; j<bs; j++) { 1665 for (k=a->i[i]; k<a->i[i+1]; k++) { 1666 for (l=0; l<bs; l++) { 1667 jj[count++] = bs*a->j[k] + l; 1668 } 1669 } 1670 } 1671 } 1672 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1673 ierr = PetscFree(jj);CHKERRQ(ierr); 1674 1675 /* store nonzero values */ 1676 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1677 count = 0; 1678 for (i=0; i<a->mbs; i++) { 1679 for (j=0; j<bs; j++) { 1680 for (k=a->i[i]; k<a->i[i+1]; k++) { 1681 for (l=0; l<bs; l++) { 1682 aa[count++] = a->a[bs2*k + l*bs + j]; 1683 } 1684 } 1685 } 1686 } 1687 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1688 ierr = PetscFree(aa);CHKERRQ(ierr); 1689 1690 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1691 if (file) { 1692 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1699 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1700 { 1701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1702 PetscErrorCode ierr; 1703 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1704 PetscViewerFormat format; 1705 1706 PetscFunctionBegin; 1707 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1708 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1709 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1710 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1711 Mat aij; 1712 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1713 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1714 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1715 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1716 PetscFunctionReturn(0); 1717 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1718 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1719 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1720 for (i=0; i<a->mbs; i++) { 1721 for (j=0; j<bs; j++) { 1722 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1723 for (k=a->i[i]; k<a->i[i+1]; k++) { 1724 for (l=0; l<bs; l++) { 1725 #if defined(PETSC_USE_COMPLEX) 1726 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1727 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1728 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1729 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1730 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1731 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1732 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1733 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1734 } 1735 #else 1736 if (a->a[bs2*k + l*bs + j] != 0.0) { 1737 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1738 } 1739 #endif 1740 } 1741 } 1742 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1743 } 1744 } 1745 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1746 } else { 1747 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1748 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1749 for (i=0; i<a->mbs; i++) { 1750 for (j=0; j<bs; j++) { 1751 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1752 for (k=a->i[i]; k<a->i[i+1]; k++) { 1753 for (l=0; l<bs; l++) { 1754 #if defined(PETSC_USE_COMPLEX) 1755 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1756 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1757 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1758 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1760 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1761 } else { 1762 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1763 } 1764 #else 1765 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1766 #endif 1767 } 1768 } 1769 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1770 } 1771 } 1772 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1773 } 1774 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1780 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1781 { 1782 Mat A = (Mat) Aa; 1783 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1784 PetscErrorCode ierr; 1785 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1786 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1787 MatScalar *aa; 1788 PetscViewer viewer; 1789 PetscViewerFormat format; 1790 1791 PetscFunctionBegin; 1792 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1793 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1794 1795 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1796 1797 /* loop over matrix elements drawing boxes */ 1798 1799 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1800 color = PETSC_DRAW_BLUE; 1801 for (i=0,row=0; i<mbs; i++,row+=bs) { 1802 for (j=a->i[i]; j<a->i[i+1]; j++) { 1803 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1804 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1805 aa = a->a + j*bs2; 1806 for (k=0; k<bs; k++) { 1807 for (l=0; l<bs; l++) { 1808 if (PetscRealPart(*aa++) >= 0.) continue; 1809 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1810 } 1811 } 1812 } 1813 } 1814 color = PETSC_DRAW_CYAN; 1815 for (i=0,row=0; i<mbs; i++,row+=bs) { 1816 for (j=a->i[i]; j<a->i[i+1]; j++) { 1817 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1818 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1819 aa = a->a + j*bs2; 1820 for (k=0; k<bs; k++) { 1821 for (l=0; l<bs; l++) { 1822 if (PetscRealPart(*aa++) != 0.) continue; 1823 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1824 } 1825 } 1826 } 1827 } 1828 color = PETSC_DRAW_RED; 1829 for (i=0,row=0; i<mbs; i++,row+=bs) { 1830 for (j=a->i[i]; j<a->i[i+1]; j++) { 1831 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1832 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1833 aa = a->a + j*bs2; 1834 for (k=0; k<bs; k++) { 1835 for (l=0; l<bs; l++) { 1836 if (PetscRealPart(*aa++) <= 0.) continue; 1837 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1838 } 1839 } 1840 } 1841 } 1842 } else { 1843 /* use contour shading to indicate magnitude of values */ 1844 /* first determine max of all nonzero values */ 1845 PetscDraw popup; 1846 PetscReal scale,maxv = 0.0; 1847 1848 for (i=0; i<a->nz*a->bs2; i++) { 1849 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1850 } 1851 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1852 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1853 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1854 for (i=0,row=0; i<mbs; i++,row+=bs) { 1855 for (j=a->i[i]; j<a->i[i+1]; j++) { 1856 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1857 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1858 aa = a->a + j*bs2; 1859 for (k=0; k<bs; k++) { 1860 for (l=0; l<bs; l++) { 1861 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1862 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1863 } 1864 } 1865 } 1866 } 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1873 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1874 { 1875 PetscErrorCode ierr; 1876 PetscReal xl,yl,xr,yr,w,h; 1877 PetscDraw draw; 1878 PetscBool isnull; 1879 1880 PetscFunctionBegin; 1881 1882 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1883 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1884 1885 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1886 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1887 xr += w; yr += h; xl = -w; yl = -h; 1888 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1889 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1890 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatView_SeqBAIJ" 1896 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1897 { 1898 PetscErrorCode ierr; 1899 PetscBool iascii,isbinary,isdraw; 1900 1901 PetscFunctionBegin; 1902 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1903 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1904 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1905 if (iascii) { 1906 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1907 } else if (isbinary) { 1908 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1909 } else if (isdraw) { 1910 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1911 } else { 1912 Mat B; 1913 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1914 ierr = MatView(B,viewer);CHKERRQ(ierr); 1915 ierr = MatDestroy(&B);CHKERRQ(ierr); 1916 } 1917 PetscFunctionReturn(0); 1918 } 1919 1920 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1923 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1924 { 1925 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1926 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1927 PetscInt *ai = a->i,*ailen = a->ilen; 1928 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1929 MatScalar *ap,*aa = a->a; 1930 1931 PetscFunctionBegin; 1932 for (k=0; k<m; k++) { /* loop over rows */ 1933 row = im[k]; brow = row/bs; 1934 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1935 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1936 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1937 nrow = ailen[brow]; 1938 for (l=0; l<n; l++) { /* loop over columns */ 1939 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1940 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1941 col = in[l] ; 1942 bcol = col/bs; 1943 cidx = col%bs; 1944 ridx = row%bs; 1945 high = nrow; 1946 low = 0; /* assume unsorted */ 1947 while (high-low > 5) { 1948 t = (low+high)/2; 1949 if (rp[t] > bcol) high = t; 1950 else low = t; 1951 } 1952 for (i=low; i<high; i++) { 1953 if (rp[i] > bcol) break; 1954 if (rp[i] == bcol) { 1955 *v++ = ap[bs2*i+bs*cidx+ridx]; 1956 goto finished; 1957 } 1958 } 1959 *v++ = 0.0; 1960 finished:; 1961 } 1962 } 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1968 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1969 { 1970 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1971 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1972 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1973 PetscErrorCode ierr; 1974 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1975 PetscBool roworiented=a->roworiented; 1976 const PetscScalar *value = v; 1977 MatScalar *ap,*aa = a->a,*bap; 1978 1979 PetscFunctionBegin; 1980 if (roworiented) { 1981 stepval = (n-1)*bs; 1982 } else { 1983 stepval = (m-1)*bs; 1984 } 1985 for (k=0; k<m; k++) { /* loop over added rows */ 1986 row = im[k]; 1987 if (row < 0) continue; 1988 #if defined(PETSC_USE_DEBUG) 1989 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1990 #endif 1991 rp = aj + ai[row]; 1992 ap = aa + bs2*ai[row]; 1993 rmax = imax[row]; 1994 nrow = ailen[row]; 1995 low = 0; 1996 high = nrow; 1997 for (l=0; l<n; l++) { /* loop over added columns */ 1998 if (in[l] < 0) continue; 1999 #if defined(PETSC_USE_DEBUG) 2000 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 2001 #endif 2002 col = in[l]; 2003 if (roworiented) { 2004 value = v + (k*(stepval+bs) + l)*bs; 2005 } else { 2006 value = v + (l*(stepval+bs) + k)*bs; 2007 } 2008 if (col <= lastcol) low = 0; else high = nrow; 2009 lastcol = col; 2010 while (high-low > 7) { 2011 t = (low+high)/2; 2012 if (rp[t] > col) high = t; 2013 else low = t; 2014 } 2015 for (i=low; i<high; i++) { 2016 if (rp[i] > col) break; 2017 if (rp[i] == col) { 2018 bap = ap + bs2*i; 2019 if (roworiented) { 2020 if (is == ADD_VALUES) { 2021 for (ii=0; ii<bs; ii++,value+=stepval) { 2022 for (jj=ii; jj<bs2; jj+=bs) { 2023 bap[jj] += *value++; 2024 } 2025 } 2026 } else { 2027 for (ii=0; ii<bs; ii++,value+=stepval) { 2028 for (jj=ii; jj<bs2; jj+=bs) { 2029 bap[jj] = *value++; 2030 } 2031 } 2032 } 2033 } else { 2034 if (is == ADD_VALUES) { 2035 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2036 for (jj=0; jj<bs; jj++) { 2037 bap[jj] += value[jj]; 2038 } 2039 bap += bs; 2040 } 2041 } else { 2042 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2043 for (jj=0; jj<bs; jj++) { 2044 bap[jj] = value[jj]; 2045 } 2046 bap += bs; 2047 } 2048 } 2049 } 2050 goto noinsert2; 2051 } 2052 } 2053 if (nonew == 1) goto noinsert2; 2054 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2055 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2056 N = nrow++ - 1; high++; 2057 /* shift up all the later entries in this row */ 2058 for (ii=N; ii>=i; ii--) { 2059 rp[ii+1] = rp[ii]; 2060 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2061 } 2062 if (N >= i) { 2063 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2064 } 2065 rp[i] = col; 2066 bap = ap + bs2*i; 2067 if (roworiented) { 2068 for (ii=0; ii<bs; ii++,value+=stepval) { 2069 for (jj=ii; jj<bs2; jj+=bs) { 2070 bap[jj] = *value++; 2071 } 2072 } 2073 } else { 2074 for (ii=0; ii<bs; ii++,value+=stepval) { 2075 for (jj=0; jj<bs; jj++) { 2076 *bap++ = *value++; 2077 } 2078 } 2079 } 2080 noinsert2:; 2081 low = i; 2082 } 2083 ailen[row] = nrow; 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2090 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2091 { 2092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2093 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2094 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2095 PetscErrorCode ierr; 2096 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2097 MatScalar *aa = a->a,*ap; 2098 PetscReal ratio=0.6; 2099 2100 PetscFunctionBegin; 2101 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2102 2103 if (m) rmax = ailen[0]; 2104 for (i=1; i<mbs; i++) { 2105 /* move each row back by the amount of empty slots (fshift) before it*/ 2106 fshift += imax[i-1] - ailen[i-1]; 2107 rmax = PetscMax(rmax,ailen[i]); 2108 if (fshift) { 2109 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2110 N = ailen[i]; 2111 for (j=0; j<N; j++) { 2112 ip[j-fshift] = ip[j]; 2113 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2114 } 2115 } 2116 ai[i] = ai[i-1] + ailen[i-1]; 2117 } 2118 if (mbs) { 2119 fshift += imax[mbs-1] - ailen[mbs-1]; 2120 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2121 } 2122 /* reset ilen and imax for each row */ 2123 for (i=0; i<mbs; i++) { 2124 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2125 } 2126 a->nz = ai[mbs]; 2127 2128 /* diagonals may have moved, so kill the diagonal pointers */ 2129 a->idiagvalid = PETSC_FALSE; 2130 if (fshift && a->diag) { 2131 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2132 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2133 a->diag = 0; 2134 } 2135 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2136 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2137 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2138 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2139 A->info.mallocs += a->reallocs; 2140 a->reallocs = 0; 2141 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2142 2143 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2144 A->same_nonzero = PETSC_TRUE; 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /* 2149 This function returns an array of flags which indicate the locations of contiguous 2150 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2151 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2152 Assume: sizes should be long enough to hold all the values. 2153 */ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2156 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2157 { 2158 PetscInt i,j,k,row; 2159 PetscBool flg; 2160 2161 PetscFunctionBegin; 2162 for (i=0,j=0; i<n; j++) { 2163 row = idx[i]; 2164 if (row%bs!=0) { /* Not the begining of a block */ 2165 sizes[j] = 1; 2166 i++; 2167 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2168 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2169 i++; 2170 } else { /* Begining of the block, so check if the complete block exists */ 2171 flg = PETSC_TRUE; 2172 for (k=1; k<bs; k++) { 2173 if (row+k != idx[i+k]) { /* break in the block */ 2174 flg = PETSC_FALSE; 2175 break; 2176 } 2177 } 2178 if (flg) { /* No break in the bs */ 2179 sizes[j] = bs; 2180 i+= bs; 2181 } else { 2182 sizes[j] = 1; 2183 i++; 2184 } 2185 } 2186 } 2187 *bs_max = j; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2193 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2194 { 2195 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2196 PetscErrorCode ierr; 2197 PetscInt i,j,k,count,*rows; 2198 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2199 PetscScalar zero = 0.0; 2200 MatScalar *aa; 2201 const PetscScalar *xx; 2202 PetscScalar *bb; 2203 2204 PetscFunctionBegin; 2205 /* fix right hand side if needed */ 2206 if (x && b) { 2207 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2208 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2209 for (i=0; i<is_n; i++) { 2210 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2211 } 2212 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2213 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2214 } 2215 2216 /* Make a copy of the IS and sort it */ 2217 /* allocate memory for rows,sizes */ 2218 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2219 2220 /* copy IS values to rows, and sort them */ 2221 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2222 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2223 2224 if (baij->keepnonzeropattern) { 2225 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2226 bs_max = is_n; 2227 A->same_nonzero = PETSC_TRUE; 2228 } else { 2229 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2230 A->same_nonzero = PETSC_FALSE; 2231 } 2232 2233 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2234 row = rows[j]; 2235 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2236 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2237 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2238 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2239 if (diag != (PetscScalar)0.0) { 2240 if (baij->ilen[row/bs] > 0) { 2241 baij->ilen[row/bs] = 1; 2242 baij->j[baij->i[row/bs]] = row/bs; 2243 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2244 } 2245 /* Now insert all the diagonal values for this bs */ 2246 for (k=0; k<bs; k++) { 2247 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2248 } 2249 } else { /* (diag == 0.0) */ 2250 baij->ilen[row/bs] = 0; 2251 } /* end (diag == 0.0) */ 2252 } else { /* (sizes[i] != bs) */ 2253 #if defined (PETSC_USE_DEBUG) 2254 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2255 #endif 2256 for (k=0; k<count; k++) { 2257 aa[0] = zero; 2258 aa += bs; 2259 } 2260 if (diag != (PetscScalar)0.0) { 2261 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2262 } 2263 } 2264 } 2265 2266 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2267 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2273 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2274 { 2275 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2276 PetscErrorCode ierr; 2277 PetscInt i,j,k,count; 2278 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2279 PetscScalar zero = 0.0; 2280 MatScalar *aa; 2281 const PetscScalar *xx; 2282 PetscScalar *bb; 2283 PetscBool *zeroed,vecs = PETSC_FALSE; 2284 2285 PetscFunctionBegin; 2286 /* fix right hand side if needed */ 2287 if (x && b) { 2288 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2289 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2290 vecs = PETSC_TRUE; 2291 } 2292 A->same_nonzero = PETSC_TRUE; 2293 2294 /* zero the columns */ 2295 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2296 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2297 for (i=0; i<is_n; i++) { 2298 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2299 zeroed[is_idx[i]] = PETSC_TRUE; 2300 } 2301 for (i=0; i<A->rmap->N; i++) { 2302 if (!zeroed[i]) { 2303 row = i/bs; 2304 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2305 for (k=0; k<bs; k++) { 2306 col = bs*baij->j[j] + k; 2307 if (zeroed[col]) { 2308 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2309 if (vecs) bb[i] -= aa[0]*xx[col]; 2310 aa[0] = 0.0; 2311 } 2312 } 2313 } 2314 } else if (vecs) bb[i] = diag*xx[i]; 2315 } 2316 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2317 if (vecs) { 2318 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2319 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2320 } 2321 2322 /* zero the rows */ 2323 for (i=0; i<is_n; i++) { 2324 row = is_idx[i]; 2325 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2326 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2327 for (k=0; k<count; k++) { 2328 aa[0] = zero; 2329 aa += bs; 2330 } 2331 if (diag != (PetscScalar)0.0) { 2332 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2333 } 2334 } 2335 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2341 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2342 { 2343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2344 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2345 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2346 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2347 PetscErrorCode ierr; 2348 PetscInt ridx,cidx,bs2=a->bs2; 2349 PetscBool roworiented=a->roworiented; 2350 MatScalar *ap,value,*aa=a->a,*bap; 2351 2352 PetscFunctionBegin; 2353 if (v) PetscValidScalarPointer(v,6); 2354 for (k=0; k<m; k++) { /* loop over added rows */ 2355 row = im[k]; 2356 brow = row/bs; 2357 if (row < 0) continue; 2358 #if defined(PETSC_USE_DEBUG) 2359 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2360 #endif 2361 rp = aj + ai[brow]; 2362 ap = aa + bs2*ai[brow]; 2363 rmax = imax[brow]; 2364 nrow = ailen[brow]; 2365 low = 0; 2366 high = nrow; 2367 for (l=0; l<n; l++) { /* loop over added columns */ 2368 if (in[l] < 0) continue; 2369 #if defined(PETSC_USE_DEBUG) 2370 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2371 #endif 2372 col = in[l]; bcol = col/bs; 2373 ridx = row % bs; cidx = col % bs; 2374 if (roworiented) { 2375 value = v[l + k*n]; 2376 } else { 2377 value = v[k + l*m]; 2378 } 2379 if (col <= lastcol) low = 0; else high = nrow; 2380 lastcol = col; 2381 while (high-low > 7) { 2382 t = (low+high)/2; 2383 if (rp[t] > bcol) high = t; 2384 else low = t; 2385 } 2386 for (i=low; i<high; i++) { 2387 if (rp[i] > bcol) break; 2388 if (rp[i] == bcol) { 2389 bap = ap + bs2*i + bs*cidx + ridx; 2390 if (is == ADD_VALUES) *bap += value; 2391 else *bap = value; 2392 goto noinsert1; 2393 } 2394 } 2395 if (nonew == 1) goto noinsert1; 2396 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2397 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2398 N = nrow++ - 1; high++; 2399 /* shift up all the later entries in this row */ 2400 for (ii=N; ii>=i; ii--) { 2401 rp[ii+1] = rp[ii]; 2402 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2403 } 2404 if (N>=i) { 2405 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2406 } 2407 rp[i] = bcol; 2408 ap[bs2*i + bs*cidx + ridx] = value; 2409 a->nz++; 2410 noinsert1:; 2411 low = i; 2412 } 2413 ailen[brow] = nrow; 2414 } 2415 A->same_nonzero = PETSC_FALSE; 2416 PetscFunctionReturn(0); 2417 } 2418 2419 #undef __FUNCT__ 2420 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2421 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2422 { 2423 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2424 Mat outA; 2425 PetscErrorCode ierr; 2426 PetscBool row_identity,col_identity; 2427 2428 PetscFunctionBegin; 2429 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2430 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2431 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2432 if (!row_identity || !col_identity) { 2433 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2434 } 2435 2436 outA = inA; 2437 inA->factortype = MAT_FACTOR_LU; 2438 2439 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2440 2441 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2442 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2443 a->row = row; 2444 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2445 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2446 a->col = col; 2447 2448 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2449 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2450 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2451 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2452 2453 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2454 if (!a->solve_work) { 2455 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2456 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2457 } 2458 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2459 2460 PetscFunctionReturn(0); 2461 } 2462 2463 EXTERN_C_BEGIN 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2466 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2467 { 2468 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2469 PetscInt i,nz,mbs; 2470 2471 PetscFunctionBegin; 2472 nz = baij->maxnz; 2473 mbs = baij->mbs; 2474 for (i=0; i<nz; i++) { 2475 baij->j[i] = indices[i]; 2476 } 2477 baij->nz = nz; 2478 for (i=0; i<mbs; i++) { 2479 baij->ilen[i] = baij->imax[i]; 2480 } 2481 PetscFunctionReturn(0); 2482 } 2483 EXTERN_C_END 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2487 /*@ 2488 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2489 in the matrix. 2490 2491 Input Parameters: 2492 + mat - the SeqBAIJ matrix 2493 - indices - the column indices 2494 2495 Level: advanced 2496 2497 Notes: 2498 This can be called if you have precomputed the nonzero structure of the 2499 matrix and want to provide it to the matrix object to improve the performance 2500 of the MatSetValues() operation. 2501 2502 You MUST have set the correct numbers of nonzeros per row in the call to 2503 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2504 2505 MUST be called before any calls to MatSetValues(); 2506 2507 @*/ 2508 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2509 { 2510 PetscErrorCode ierr; 2511 2512 PetscFunctionBegin; 2513 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2514 PetscValidPointer(indices,2); 2515 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2521 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2522 { 2523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2524 PetscErrorCode ierr; 2525 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2526 PetscReal atmp; 2527 PetscScalar *x,zero = 0.0; 2528 MatScalar *aa; 2529 PetscInt ncols,brow,krow,kcol; 2530 2531 PetscFunctionBegin; 2532 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2533 bs = A->rmap->bs; 2534 aa = a->a; 2535 ai = a->i; 2536 aj = a->j; 2537 mbs = a->mbs; 2538 2539 ierr = VecSet(v,zero);CHKERRQ(ierr); 2540 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2541 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2542 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2543 for (i=0; i<mbs; i++) { 2544 ncols = ai[1] - ai[0]; ai++; 2545 brow = bs*i; 2546 for (j=0; j<ncols; j++) { 2547 for (kcol=0; kcol<bs; kcol++) { 2548 for (krow=0; krow<bs; krow++) { 2549 atmp = PetscAbsScalar(*aa);aa++; 2550 row = brow + krow; /* row index */ 2551 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2552 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2553 } 2554 } 2555 aj++; 2556 } 2557 } 2558 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatCopy_SeqBAIJ" 2564 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2565 { 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 /* If the two matrices have the same copy implementation, use fast copy. */ 2570 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2571 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2572 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2573 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2574 2575 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2576 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2577 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2578 } else { 2579 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2580 } 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2586 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2587 { 2588 PetscErrorCode ierr; 2589 2590 PetscFunctionBegin; 2591 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2597 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2598 { 2599 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2600 PetscFunctionBegin; 2601 *array = a->a; 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2607 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2608 { 2609 PetscFunctionBegin; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2615 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2616 { 2617 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2618 PetscErrorCode ierr; 2619 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2620 PetscBLASInt one=1; 2621 2622 PetscFunctionBegin; 2623 if (str == SAME_NONZERO_PATTERN) { 2624 PetscScalar alpha = a; 2625 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2626 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2627 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2628 if (y->xtoy && y->XtoY != X) { 2629 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2630 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2631 } 2632 if (!y->xtoy) { /* get xtoy */ 2633 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2634 y->XtoY = X; 2635 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2636 } 2637 for (i=0; i<x->nz; i++) { 2638 j = 0; 2639 while (j < bs2) { 2640 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2641 j++; 2642 } 2643 } 2644 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2645 } else { 2646 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2647 } 2648 PetscFunctionReturn(0); 2649 } 2650 2651 #undef __FUNCT__ 2652 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2653 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2654 { 2655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2656 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2657 MatScalar *aa = a->a; 2658 2659 PetscFunctionBegin; 2660 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #undef __FUNCT__ 2665 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2666 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2667 { 2668 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2669 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2670 MatScalar *aa = a->a; 2671 2672 PetscFunctionBegin; 2673 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2674 PetscFunctionReturn(0); 2675 } 2676 2677 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2678 2679 #undef __FUNCT__ 2680 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2681 /* 2682 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2683 */ 2684 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2685 { 2686 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2687 PetscErrorCode ierr; 2688 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2689 PetscInt nz = a->i[m],row,*jj,mr,col; 2690 2691 PetscFunctionBegin; 2692 *nn = n; 2693 if (!ia) PetscFunctionReturn(0); 2694 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2695 else { 2696 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2697 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2698 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2699 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2700 jj = a->j; 2701 for (i=0; i<nz; i++) { 2702 collengths[jj[i]]++; 2703 } 2704 cia[0] = oshift; 2705 for (i=0; i<n; i++) { 2706 cia[i+1] = cia[i] + collengths[i]; 2707 } 2708 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2709 jj = a->j; 2710 for (row=0; row<m; row++) { 2711 mr = a->i[row+1] - a->i[row]; 2712 for (i=0; i<mr; i++) { 2713 col = *jj++; 2714 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2715 } 2716 } 2717 ierr = PetscFree(collengths);CHKERRQ(ierr); 2718 *ia = cia; *ja = cja; 2719 } 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2725 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2726 { 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 if (!ia) PetscFunctionReturn(0); 2731 ierr = PetscFree(*ia);CHKERRQ(ierr); 2732 ierr = PetscFree(*ja);CHKERRQ(ierr); 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2738 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2739 { 2740 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2741 PetscErrorCode ierr; 2742 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2743 PetscScalar dx,*y,*xx,*w3_array; 2744 PetscScalar *vscale_array; 2745 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2746 Vec w1=coloring->w1,w2=coloring->w2,w3; 2747 void *fctx = coloring->fctx; 2748 PetscBool flg = PETSC_FALSE; 2749 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2750 Vec x1_tmp; 2751 2752 PetscFunctionBegin; 2753 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2754 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2755 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2756 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2757 2758 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2759 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2760 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2761 if (flg) { 2762 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2763 } else { 2764 PetscBool assembled; 2765 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2766 if (assembled) { 2767 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2768 } 2769 } 2770 2771 x1_tmp = x1; 2772 if (!coloring->vscale) { 2773 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2774 } 2775 2776 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2777 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2778 } 2779 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2780 2781 /* Set w1 = F(x1) */ 2782 if (!coloring->fset) { 2783 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2784 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2785 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2786 } else { 2787 coloring->fset = PETSC_FALSE; 2788 } 2789 2790 if (!coloring->w3) { 2791 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2793 } 2794 w3 = coloring->w3; 2795 2796 CHKMEMQ; 2797 /* Compute all the local scale factors, including ghost points */ 2798 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2799 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2800 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2801 if (ctype == IS_COLORING_GHOSTED) { 2802 col_start = 0; col_end = N; 2803 } else if (ctype == IS_COLORING_GLOBAL) { 2804 xx = xx - start; 2805 vscale_array = vscale_array - start; 2806 col_start = start; col_end = N + start; 2807 } CHKMEMQ; 2808 for (col=col_start; col<col_end; col++) { 2809 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2810 if (coloring->htype[0] == 'w') { 2811 dx = 1.0 + unorm; 2812 } else { 2813 dx = xx[col]; 2814 } 2815 if (dx == (PetscScalar)0.0) dx = 1.0; 2816 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2817 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2818 dx *= epsilon; 2819 vscale_array[col] = (PetscScalar)1.0/dx; 2820 } CHKMEMQ; 2821 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2822 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2823 if (ctype == IS_COLORING_GLOBAL) { 2824 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2825 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2826 } 2827 CHKMEMQ; 2828 if (coloring->vscaleforrow) { 2829 vscaleforrow = coloring->vscaleforrow; 2830 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2831 2832 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2833 /* 2834 Loop over each color 2835 */ 2836 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2837 for (k=0; k<coloring->ncolors; k++) { 2838 coloring->currentcolor = k; 2839 for (i=0; i<bs; i++) { 2840 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2841 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2842 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2843 /* 2844 Loop over each column associated with color 2845 adding the perturbation to the vector w3. 2846 */ 2847 for (l=0; l<coloring->ncolumns[k]; l++) { 2848 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2849 if (coloring->htype[0] == 'w') { 2850 dx = 1.0 + unorm; 2851 } else { 2852 dx = xx[col]; 2853 } 2854 if (dx == (PetscScalar)0.0) dx = 1.0; 2855 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2856 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2857 dx *= epsilon; 2858 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2859 w3_array[col] += dx; 2860 } 2861 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2862 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2863 2864 /* 2865 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2866 w2 = F(x1 + dx) - F(x1) 2867 */ 2868 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2869 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2870 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2871 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2872 2873 /* 2874 Loop over rows of vector, putting results into Jacobian matrix 2875 */ 2876 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2877 for (l=0; l<coloring->nrows[k]; l++) { 2878 row = bs*coloring->rows[k][l]; /* local row index */ 2879 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2880 for (j=0; j<bs; j++) { 2881 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2882 srows[j] = row + start + j; 2883 } 2884 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2885 } 2886 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2887 } 2888 } /* endof for each color */ 2889 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2890 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2891 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2892 ierr = PetscFree(srows);CHKERRQ(ierr); 2893 2894 coloring->currentcolor = -1; 2895 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2896 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2897 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2898 PetscFunctionReturn(0); 2899 } 2900 2901 /* -------------------------------------------------------------------*/ 2902 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2903 MatGetRow_SeqBAIJ, 2904 MatRestoreRow_SeqBAIJ, 2905 MatMult_SeqBAIJ_N, 2906 /* 4*/ MatMultAdd_SeqBAIJ_N, 2907 MatMultTranspose_SeqBAIJ, 2908 MatMultTransposeAdd_SeqBAIJ, 2909 0, 2910 0, 2911 0, 2912 /*10*/ 0, 2913 MatLUFactor_SeqBAIJ, 2914 0, 2915 0, 2916 MatTranspose_SeqBAIJ, 2917 /*15*/ MatGetInfo_SeqBAIJ, 2918 MatEqual_SeqBAIJ, 2919 MatGetDiagonal_SeqBAIJ, 2920 MatDiagonalScale_SeqBAIJ, 2921 MatNorm_SeqBAIJ, 2922 /*20*/ 0, 2923 MatAssemblyEnd_SeqBAIJ, 2924 MatSetOption_SeqBAIJ, 2925 MatZeroEntries_SeqBAIJ, 2926 /*24*/ MatZeroRows_SeqBAIJ, 2927 0, 2928 0, 2929 0, 2930 0, 2931 /*29*/ MatSetUp_SeqBAIJ, 2932 0, 2933 0, 2934 0, 2935 0, 2936 /*34*/ MatDuplicate_SeqBAIJ, 2937 0, 2938 0, 2939 MatILUFactor_SeqBAIJ, 2940 0, 2941 /*39*/ MatAXPY_SeqBAIJ, 2942 MatGetSubMatrices_SeqBAIJ, 2943 MatIncreaseOverlap_SeqBAIJ, 2944 MatGetValues_SeqBAIJ, 2945 MatCopy_SeqBAIJ, 2946 /*44*/ 0, 2947 MatScale_SeqBAIJ, 2948 0, 2949 0, 2950 MatZeroRowsColumns_SeqBAIJ, 2951 /*49*/ 0, 2952 MatGetRowIJ_SeqBAIJ, 2953 MatRestoreRowIJ_SeqBAIJ, 2954 MatGetColumnIJ_SeqBAIJ, 2955 MatRestoreColumnIJ_SeqBAIJ, 2956 /*54*/ MatFDColoringCreate_SeqAIJ, 2957 0, 2958 0, 2959 0, 2960 MatSetValuesBlocked_SeqBAIJ, 2961 /*59*/ MatGetSubMatrix_SeqBAIJ, 2962 MatDestroy_SeqBAIJ, 2963 MatView_SeqBAIJ, 2964 0, 2965 0, 2966 /*64*/ 0, 2967 0, 2968 0, 2969 0, 2970 0, 2971 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2972 0, 2973 MatConvert_Basic, 2974 0, 2975 0, 2976 /*74*/ 0, 2977 MatFDColoringApply_BAIJ, 2978 0, 2979 0, 2980 0, 2981 /*79*/ 0, 2982 0, 2983 0, 2984 0, 2985 MatLoad_SeqBAIJ, 2986 /*84*/ 0, 2987 0, 2988 0, 2989 0, 2990 0, 2991 /*89*/ 0, 2992 0, 2993 0, 2994 0, 2995 0, 2996 /*94*/ 0, 2997 0, 2998 0, 2999 0, 3000 0, 3001 /*99*/0, 3002 0, 3003 0, 3004 0, 3005 0, 3006 /*104*/0, 3007 MatRealPart_SeqBAIJ, 3008 MatImaginaryPart_SeqBAIJ, 3009 0, 3010 0, 3011 /*109*/0, 3012 0, 3013 0, 3014 0, 3015 MatMissingDiagonal_SeqBAIJ, 3016 /*114*/0, 3017 0, 3018 0, 3019 0, 3020 0, 3021 /*119*/0, 3022 0, 3023 MatMultHermitianTranspose_SeqBAIJ, 3024 MatMultHermitianTransposeAdd_SeqBAIJ, 3025 0, 3026 /*124*/0, 3027 0, 3028 MatInvertBlockDiagonal_SeqBAIJ 3029 }; 3030 3031 EXTERN_C_BEGIN 3032 #undef __FUNCT__ 3033 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3034 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3035 { 3036 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3037 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3038 PetscErrorCode ierr; 3039 3040 PetscFunctionBegin; 3041 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3042 3043 /* allocate space for values if not already there */ 3044 if (!aij->saved_values) { 3045 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3046 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3047 } 3048 3049 /* copy values over */ 3050 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 EXTERN_C_END 3054 3055 EXTERN_C_BEGIN 3056 #undef __FUNCT__ 3057 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3058 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3059 { 3060 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3061 PetscErrorCode ierr; 3062 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3063 3064 PetscFunctionBegin; 3065 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3066 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3067 3068 /* copy values over */ 3069 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 EXTERN_C_END 3073 3074 EXTERN_C_BEGIN 3075 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3076 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3077 EXTERN_C_END 3078 3079 EXTERN_C_BEGIN 3080 #undef __FUNCT__ 3081 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3082 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3083 { 3084 Mat_SeqBAIJ *b; 3085 PetscErrorCode ierr; 3086 PetscInt i,mbs,nbs,bs2; 3087 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3088 3089 PetscFunctionBegin; 3090 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3091 if (nz == MAT_SKIP_ALLOCATION) { 3092 skipallocation = PETSC_TRUE; 3093 nz = 0; 3094 } 3095 3096 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3097 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3098 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3099 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3100 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3101 3102 B->preallocated = PETSC_TRUE; 3103 3104 mbs = B->rmap->n/bs; 3105 nbs = B->cmap->n/bs; 3106 bs2 = bs*bs; 3107 3108 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3109 3110 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3111 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3112 if (nnz) { 3113 for (i=0; i<mbs; i++) { 3114 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3115 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3116 } 3117 } 3118 3119 b = (Mat_SeqBAIJ*)B->data; 3120 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3121 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3122 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3123 3124 if (!flg) { 3125 switch (bs) { 3126 case 1: 3127 B->ops->mult = MatMult_SeqBAIJ_1; 3128 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3129 B->ops->sor = MatSOR_SeqBAIJ_1; 3130 break; 3131 case 2: 3132 B->ops->mult = MatMult_SeqBAIJ_2; 3133 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3134 B->ops->sor = MatSOR_SeqBAIJ_2; 3135 break; 3136 case 3: 3137 B->ops->mult = MatMult_SeqBAIJ_3; 3138 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3139 B->ops->sor = MatSOR_SeqBAIJ_3; 3140 break; 3141 case 4: 3142 B->ops->mult = MatMult_SeqBAIJ_4; 3143 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3144 B->ops->sor = MatSOR_SeqBAIJ_4; 3145 break; 3146 case 5: 3147 B->ops->mult = MatMult_SeqBAIJ_5; 3148 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3149 B->ops->sor = MatSOR_SeqBAIJ_5; 3150 break; 3151 case 6: 3152 B->ops->mult = MatMult_SeqBAIJ_6; 3153 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3154 B->ops->sor = MatSOR_SeqBAIJ_6; 3155 break; 3156 case 7: 3157 B->ops->mult = MatMult_SeqBAIJ_7; 3158 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3159 B->ops->sor = MatSOR_SeqBAIJ_7; 3160 break; 3161 case 15: 3162 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3163 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3164 B->ops->sor = MatSOR_SeqBAIJ_N; 3165 break; 3166 default: 3167 B->ops->mult = MatMult_SeqBAIJ_N; 3168 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3169 B->ops->sor = MatSOR_SeqBAIJ_N; 3170 break; 3171 } 3172 } 3173 b->mbs = mbs; 3174 b->nbs = nbs; 3175 if (!skipallocation) { 3176 if (!b->imax) { 3177 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3178 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3179 b->free_imax_ilen = PETSC_TRUE; 3180 } 3181 /* b->ilen will count nonzeros in each block row so far. */ 3182 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3183 if (!nnz) { 3184 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3185 else if (nz < 0) nz = 1; 3186 for (i=0; i<mbs; i++) b->imax[i] = nz; 3187 nz = nz*mbs; 3188 } else { 3189 nz = 0; 3190 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3191 } 3192 3193 /* allocate the matrix space */ 3194 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3195 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3196 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3197 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3198 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3199 b->singlemalloc = PETSC_TRUE; 3200 b->i[0] = 0; 3201 for (i=1; i<mbs+1; i++) { 3202 b->i[i] = b->i[i-1] + b->imax[i-1]; 3203 } 3204 b->free_a = PETSC_TRUE; 3205 b->free_ij = PETSC_TRUE; 3206 } else { 3207 b->free_a = PETSC_FALSE; 3208 b->free_ij = PETSC_FALSE; 3209 } 3210 3211 b->bs2 = bs2; 3212 b->mbs = mbs; 3213 b->nz = 0; 3214 b->maxnz = nz; 3215 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3216 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3217 PetscFunctionReturn(0); 3218 } 3219 EXTERN_C_END 3220 3221 EXTERN_C_BEGIN 3222 #undef __FUNCT__ 3223 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3224 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3225 { 3226 PetscInt i,m,nz,nz_max=0,*nnz; 3227 PetscScalar *values=0; 3228 PetscErrorCode ierr; 3229 3230 PetscFunctionBegin; 3231 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3232 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3233 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3234 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3235 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3236 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3237 m = B->rmap->n/bs; 3238 3239 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3240 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3241 for (i=0; i<m; i++) { 3242 nz = ii[i+1]- ii[i]; 3243 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3244 nz_max = PetscMax(nz_max, nz); 3245 nnz[i] = nz; 3246 } 3247 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3248 ierr = PetscFree(nnz);CHKERRQ(ierr); 3249 3250 values = (PetscScalar*)V; 3251 if (!values) { 3252 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3253 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3254 } 3255 for (i=0; i<m; i++) { 3256 PetscInt ncols = ii[i+1] - ii[i]; 3257 const PetscInt *icols = jj + ii[i]; 3258 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3259 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3260 } 3261 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3262 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3263 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 EXTERN_C_END 3268 3269 3270 EXTERN_C_BEGIN 3271 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3272 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3273 #if defined(PETSC_HAVE_MUMPS) 3274 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3275 #endif 3276 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3277 EXTERN_C_END 3278 3279 /*MC 3280 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3281 block sparse compressed row format. 3282 3283 Options Database Keys: 3284 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3285 3286 Level: beginner 3287 3288 .seealso: MatCreateSeqBAIJ() 3289 M*/ 3290 3291 EXTERN_C_BEGIN 3292 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3293 EXTERN_C_END 3294 3295 EXTERN_C_BEGIN 3296 #undef __FUNCT__ 3297 #define __FUNCT__ "MatCreate_SeqBAIJ" 3298 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3299 { 3300 PetscErrorCode ierr; 3301 PetscMPIInt size; 3302 Mat_SeqBAIJ *b; 3303 3304 PetscFunctionBegin; 3305 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3306 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3307 3308 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3309 B->data = (void*)b; 3310 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3311 b->row = 0; 3312 b->col = 0; 3313 b->icol = 0; 3314 b->reallocs = 0; 3315 b->saved_values = 0; 3316 3317 b->roworiented = PETSC_TRUE; 3318 b->nonew = 0; 3319 b->diag = 0; 3320 b->solve_work = 0; 3321 b->mult_work = 0; 3322 B->spptr = 0; 3323 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3324 b->keepnonzeropattern = PETSC_FALSE; 3325 b->xtoy = 0; 3326 b->XtoY = 0; 3327 B->same_nonzero = PETSC_FALSE; 3328 3329 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3330 "MatGetFactorAvailable_seqbaij_petsc", 3331 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3332 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3333 "MatGetFactor_seqbaij_petsc", 3334 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3335 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3336 "MatGetFactor_seqbaij_bstrm", 3337 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3338 #if defined(PETSC_HAVE_MUMPS) 3339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3340 #endif 3341 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3342 "MatInvertBlockDiagonal_SeqBAIJ", 3343 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3344 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3345 "MatStoreValues_SeqBAIJ", 3346 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3347 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3348 "MatRetrieveValues_SeqBAIJ", 3349 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3351 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3352 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3353 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3354 "MatConvert_SeqBAIJ_SeqAIJ", 3355 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3356 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3357 "MatConvert_SeqBAIJ_SeqSBAIJ", 3358 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3359 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3360 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3361 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3362 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3363 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3364 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3365 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3366 "MatConvert_SeqBAIJ_SeqBSTRM", 3367 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3368 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3369 "MatIsTranspose_SeqBAIJ", 3370 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3371 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3372 PetscFunctionReturn(0); 3373 } 3374 EXTERN_C_END 3375 3376 #undef __FUNCT__ 3377 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3378 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3379 { 3380 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3381 PetscErrorCode ierr; 3382 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3383 3384 PetscFunctionBegin; 3385 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3386 3387 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3388 c->imax = a->imax; 3389 c->ilen = a->ilen; 3390 c->free_imax_ilen = PETSC_FALSE; 3391 } else { 3392 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3393 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3394 for (i=0; i<mbs; i++) { 3395 c->imax[i] = a->imax[i]; 3396 c->ilen[i] = a->ilen[i]; 3397 } 3398 c->free_imax_ilen = PETSC_TRUE; 3399 } 3400 3401 /* allocate the matrix space */ 3402 if (mallocmatspace) { 3403 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3404 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3405 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3406 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3407 c->i = a->i; 3408 c->j = a->j; 3409 c->singlemalloc = PETSC_FALSE; 3410 c->free_a = PETSC_TRUE; 3411 c->free_ij = PETSC_FALSE; 3412 c->parent = A; 3413 C->preallocated = PETSC_TRUE; 3414 C->assembled = PETSC_TRUE; 3415 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3416 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3417 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3418 } else { 3419 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3420 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3421 c->singlemalloc = PETSC_TRUE; 3422 c->free_a = PETSC_TRUE; 3423 c->free_ij = PETSC_TRUE; 3424 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3425 if (mbs > 0) { 3426 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3427 if (cpvalues == MAT_COPY_VALUES) { 3428 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3429 } else { 3430 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3431 } 3432 } 3433 C->preallocated = PETSC_TRUE; 3434 C->assembled = PETSC_TRUE; 3435 } 3436 } 3437 3438 c->roworiented = a->roworiented; 3439 c->nonew = a->nonew; 3440 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3441 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3442 c->bs2 = a->bs2; 3443 c->mbs = a->mbs; 3444 c->nbs = a->nbs; 3445 3446 if (a->diag) { 3447 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3448 c->diag = a->diag; 3449 c->free_diag = PETSC_FALSE; 3450 } else { 3451 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3452 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3453 for (i=0; i<mbs; i++) { 3454 c->diag[i] = a->diag[i]; 3455 } 3456 c->free_diag = PETSC_TRUE; 3457 } 3458 } else c->diag = 0; 3459 c->nz = a->nz; 3460 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3461 c->solve_work = 0; 3462 c->mult_work = 0; 3463 3464 c->compressedrow.use = a->compressedrow.use; 3465 c->compressedrow.nrows = a->compressedrow.nrows; 3466 c->compressedrow.check = a->compressedrow.check; 3467 if (a->compressedrow.use) { 3468 i = a->compressedrow.nrows; 3469 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3470 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3471 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3472 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3473 } else { 3474 c->compressedrow.use = PETSC_FALSE; 3475 c->compressedrow.i = PETSC_NULL; 3476 c->compressedrow.rindex = PETSC_NULL; 3477 } 3478 C->same_nonzero = A->same_nonzero; 3479 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3480 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3481 PetscFunctionReturn(0); 3482 } 3483 3484 #undef __FUNCT__ 3485 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3486 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3487 { 3488 PetscErrorCode ierr; 3489 3490 PetscFunctionBegin; 3491 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3492 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3493 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3494 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3495 PetscFunctionReturn(0); 3496 } 3497 3498 #undef __FUNCT__ 3499 #define __FUNCT__ "MatLoad_SeqBAIJ" 3500 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3501 { 3502 Mat_SeqBAIJ *a; 3503 PetscErrorCode ierr; 3504 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3505 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3506 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3507 PetscInt *masked,nmask,tmp,bs2,ishift; 3508 PetscMPIInt size; 3509 int fd; 3510 PetscScalar *aa; 3511 MPI_Comm comm = ((PetscObject)viewer)->comm; 3512 3513 PetscFunctionBegin; 3514 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3515 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3516 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3517 bs2 = bs*bs; 3518 3519 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3520 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3521 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3522 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3523 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3524 M = header[1]; N = header[2]; nz = header[3]; 3525 3526 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3527 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3528 3529 /* 3530 This code adds extra rows to make sure the number of rows is 3531 divisible by the blocksize 3532 */ 3533 mbs = M/bs; 3534 extra_rows = bs - M + bs*(mbs); 3535 if (extra_rows == bs) extra_rows = 0; 3536 else mbs++; 3537 if (extra_rows) { 3538 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3539 } 3540 3541 /* Set global sizes if not already set */ 3542 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3543 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3544 } else { /* Check if the matrix global sizes are correct */ 3545 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3546 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3547 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3548 } 3549 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3550 } 3551 3552 /* read in row lengths */ 3553 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3554 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3555 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3556 3557 /* read in column indices */ 3558 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3559 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3560 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3561 3562 /* loop over row lengths determining block row lengths */ 3563 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3564 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3565 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3566 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3567 rowcount = 0; 3568 nzcount = 0; 3569 for (i=0; i<mbs; i++) { 3570 nmask = 0; 3571 for (j=0; j<bs; j++) { 3572 kmax = rowlengths[rowcount]; 3573 for (k=0; k<kmax; k++) { 3574 tmp = jj[nzcount++]/bs; 3575 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3576 } 3577 rowcount++; 3578 } 3579 browlengths[i] += nmask; 3580 /* zero out the mask elements we set */ 3581 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3582 } 3583 3584 /* Do preallocation */ 3585 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3586 a = (Mat_SeqBAIJ*)newmat->data; 3587 3588 /* set matrix "i" values */ 3589 a->i[0] = 0; 3590 for (i=1; i<= mbs; i++) { 3591 a->i[i] = a->i[i-1] + browlengths[i-1]; 3592 a->ilen[i-1] = browlengths[i-1]; 3593 } 3594 a->nz = 0; 3595 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3596 3597 /* read in nonzero values */ 3598 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3599 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3600 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3601 3602 /* set "a" and "j" values into matrix */ 3603 nzcount = 0; jcount = 0; 3604 for (i=0; i<mbs; i++) { 3605 nzcountb = nzcount; 3606 nmask = 0; 3607 for (j=0; j<bs; j++) { 3608 kmax = rowlengths[i*bs+j]; 3609 for (k=0; k<kmax; k++) { 3610 tmp = jj[nzcount++]/bs; 3611 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3612 } 3613 } 3614 /* sort the masked values */ 3615 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3616 3617 /* set "j" values into matrix */ 3618 maskcount = 1; 3619 for (j=0; j<nmask; j++) { 3620 a->j[jcount++] = masked[j]; 3621 mask[masked[j]] = maskcount++; 3622 } 3623 /* set "a" values into matrix */ 3624 ishift = bs2*a->i[i]; 3625 for (j=0; j<bs; j++) { 3626 kmax = rowlengths[i*bs+j]; 3627 for (k=0; k<kmax; k++) { 3628 tmp = jj[nzcountb]/bs ; 3629 block = mask[tmp] - 1; 3630 point = jj[nzcountb] - bs*tmp; 3631 idx = ishift + bs2*block + j + bs*point; 3632 a->a[idx] = (MatScalar)aa[nzcountb++]; 3633 } 3634 } 3635 /* zero out the mask elements we set */ 3636 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3637 } 3638 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3639 3640 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3641 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3642 ierr = PetscFree(aa);CHKERRQ(ierr); 3643 ierr = PetscFree(jj);CHKERRQ(ierr); 3644 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3645 3646 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 PetscFunctionReturn(0); 3649 } 3650 3651 #undef __FUNCT__ 3652 #define __FUNCT__ "MatCreateSeqBAIJ" 3653 /*@C 3654 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3655 compressed row) format. For good matrix assembly performance the 3656 user should preallocate the matrix storage by setting the parameter nz 3657 (or the array nnz). By setting these parameters accurately, performance 3658 during matrix assembly can be increased by more than a factor of 50. 3659 3660 Collective on MPI_Comm 3661 3662 Input Parameters: 3663 + comm - MPI communicator, set to PETSC_COMM_SELF 3664 . bs - size of block 3665 . m - number of rows 3666 . n - number of columns 3667 . nz - number of nonzero blocks per block row (same for all rows) 3668 - nnz - array containing the number of nonzero blocks in the various block rows 3669 (possibly different for each block row) or PETSC_NULL 3670 3671 Output Parameter: 3672 . A - the matrix 3673 3674 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3675 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3676 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3677 3678 Options Database Keys: 3679 . -mat_no_unroll - uses code that does not unroll the loops in the 3680 block calculations (much slower) 3681 . -mat_block_size - size of the blocks to use 3682 3683 Level: intermediate 3684 3685 Notes: 3686 The number of rows and columns must be divisible by blocksize. 3687 3688 If the nnz parameter is given then the nz parameter is ignored 3689 3690 A nonzero block is any block that as 1 or more nonzeros in it 3691 3692 The block AIJ format is fully compatible with standard Fortran 77 3693 storage. That is, the stored row and column indices can begin at 3694 either one (as in Fortran) or zero. See the users' manual for details. 3695 3696 Specify the preallocated storage with either nz or nnz (not both). 3697 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3698 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3699 matrices. 3700 3701 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3702 @*/ 3703 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3704 { 3705 PetscErrorCode ierr; 3706 3707 PetscFunctionBegin; 3708 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3709 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3710 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3711 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3712 PetscFunctionReturn(0); 3713 } 3714 3715 #undef __FUNCT__ 3716 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3717 /*@C 3718 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3719 per row in the matrix. For good matrix assembly performance the 3720 user should preallocate the matrix storage by setting the parameter nz 3721 (or the array nnz). By setting these parameters accurately, performance 3722 during matrix assembly can be increased by more than a factor of 50. 3723 3724 Collective on MPI_Comm 3725 3726 Input Parameters: 3727 + A - the matrix 3728 . bs - size of block 3729 . nz - number of block nonzeros per block row (same for all rows) 3730 - nnz - array containing the number of block nonzeros in the various block rows 3731 (possibly different for each block row) or PETSC_NULL 3732 3733 Options Database Keys: 3734 . -mat_no_unroll - uses code that does not unroll the loops in the 3735 block calculations (much slower) 3736 . -mat_block_size - size of the blocks to use 3737 3738 Level: intermediate 3739 3740 Notes: 3741 If the nnz parameter is given then the nz parameter is ignored 3742 3743 You can call MatGetInfo() to get information on how effective the preallocation was; 3744 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3745 You can also run with the option -info and look for messages with the string 3746 malloc in them to see if additional memory allocation was needed. 3747 3748 The block AIJ format is fully compatible with standard Fortran 77 3749 storage. That is, the stored row and column indices can begin at 3750 either one (as in Fortran) or zero. See the users' manual for details. 3751 3752 Specify the preallocated storage with either nz or nnz (not both). 3753 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3754 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3755 3756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3757 @*/ 3758 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3759 { 3760 PetscErrorCode ierr; 3761 3762 PetscFunctionBegin; 3763 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3764 PetscValidType(B,1); 3765 PetscValidLogicalCollectiveInt(B,bs,2); 3766 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3767 PetscFunctionReturn(0); 3768 } 3769 3770 #undef __FUNCT__ 3771 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3772 /*@C 3773 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3774 (the default sequential PETSc format). 3775 3776 Collective on MPI_Comm 3777 3778 Input Parameters: 3779 + A - the matrix 3780 . i - the indices into j for the start of each local row (starts with zero) 3781 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3782 - v - optional values in the matrix 3783 3784 Level: developer 3785 3786 .keywords: matrix, aij, compressed row, sparse 3787 3788 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3789 @*/ 3790 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3791 { 3792 PetscErrorCode ierr; 3793 3794 PetscFunctionBegin; 3795 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3796 PetscValidType(B,1); 3797 PetscValidLogicalCollectiveInt(B,bs,2); 3798 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3799 PetscFunctionReturn(0); 3800 } 3801 3802 3803 #undef __FUNCT__ 3804 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3805 /*@ 3806 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3807 3808 Collective on MPI_Comm 3809 3810 Input Parameters: 3811 + comm - must be an MPI communicator of size 1 3812 . bs - size of block 3813 . m - number of rows 3814 . n - number of columns 3815 . i - row indices 3816 . j - column indices 3817 - a - matrix values 3818 3819 Output Parameter: 3820 . mat - the matrix 3821 3822 Level: advanced 3823 3824 Notes: 3825 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3826 once the matrix is destroyed 3827 3828 You cannot set new nonzero locations into this matrix, that will generate an error. 3829 3830 The i and j indices are 0 based 3831 3832 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3833 3834 3835 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3836 3837 @*/ 3838 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3839 { 3840 PetscErrorCode ierr; 3841 PetscInt ii; 3842 Mat_SeqBAIJ *baij; 3843 3844 PetscFunctionBegin; 3845 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3846 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3847 3848 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3849 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3850 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3851 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3852 baij = (Mat_SeqBAIJ*)(*mat)->data; 3853 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3854 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3855 3856 baij->i = i; 3857 baij->j = j; 3858 baij->a = a; 3859 baij->singlemalloc = PETSC_FALSE; 3860 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3861 baij->free_a = PETSC_FALSE; 3862 baij->free_ij = PETSC_FALSE; 3863 3864 for (ii=0; ii<m; ii++) { 3865 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3866 #if defined(PETSC_USE_DEBUG) 3867 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3868 #endif 3869 } 3870 #if defined(PETSC_USE_DEBUG) 3871 for (ii=0; ii<baij->i[m]; ii++) { 3872 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3873 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3874 } 3875 #endif 3876 3877 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 PetscFunctionReturn(0); 3880 } 3881