1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 166 x[i2] = idiag[0]*s1; 167 idiag += 1; 168 i2 += 1; 169 } 170 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 171 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 172 } 173 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 174 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 175 i2 = 0; 176 mdiag = a->idiag+a->mbs; 177 for (i=0; i<m; i++) { 178 x1 = x[i2]; 179 x[i2] = mdiag[0]*x1; 180 mdiag += 1; 181 i2 += 1; 182 } 183 ierr = PetscLogFlops(m);CHKERRQ(ierr); 184 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 185 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 186 } 187 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 188 idiag = a->idiag+a->mbs - 1; 189 i2 = m - 1; 190 x1 = x[i2]; 191 x[i2] = idiag[0]*x1; 192 idiag -= 1; 193 i2 -= 1; 194 for (i=m-2; i>=0; i--) { 195 v = aa + (diag[i]+1); 196 vi = aj + diag[i] + 1; 197 nz = ai[i+1] - diag[i] - 1; 198 s1 = x[i2]; 199 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 200 x[i2] = idiag[0]*s1; 201 idiag -= 1; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 215 { 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 217 PetscScalar *x,x1,x2,s1,s2; 218 const PetscScalar *b; 219 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 220 PetscErrorCode ierr; 221 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 222 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 223 224 PetscFunctionBegin; 225 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 226 its = its*lits; 227 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 228 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 229 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 230 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 231 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 232 233 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 234 235 diag = a->diag; 236 idiag = a->idiag; 237 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 238 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 239 240 if (flag & SOR_ZERO_INITIAL_GUESS) { 241 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 242 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 243 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 244 i2 = 2; 245 idiag += 4; 246 for (i=1; i<m; i++) { 247 v = aa + 4*ai[i]; 248 vi = aj + ai[i]; 249 nz = diag[i] - ai[i]; 250 s1 = b[i2]; s2 = b[i2+1]; 251 for (j=0; j<nz; j++) { 252 idx = 2*vi[j]; 253 it = 4*j; 254 x1 = x[idx]; x2 = x[1+idx]; 255 s1 -= v[it]*x1 + v[it+2]*x2; 256 s2 -= v[it+1]*x1 + v[it+3]*x2; 257 } 258 x[i2] = idiag[0]*s1 + idiag[2]*s2; 259 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 260 idiag += 4; 261 i2 += 2; 262 } 263 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 264 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 265 } 266 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 267 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 268 i2 = 0; 269 mdiag = a->idiag+4*a->mbs; 270 for (i=0; i<m; i++) { 271 x1 = x[i2]; x2 = x[i2+1]; 272 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 273 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 274 mdiag += 4; 275 i2 += 2; 276 } 277 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 278 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 279 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 280 } 281 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 282 idiag = a->idiag+4*a->mbs - 4; 283 i2 = 2*m - 2; 284 x1 = x[i2]; x2 = x[i2+1]; 285 x[i2] = idiag[0]*x1 + idiag[2]*x2; 286 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 287 idiag -= 4; 288 i2 -= 2; 289 for (i=m-2; i>=0; i--) { 290 v = aa + 4*(diag[i]+1); 291 vi = aj + diag[i] + 1; 292 nz = ai[i+1] - diag[i] - 1; 293 s1 = x[i2]; s2 = x[i2+1]; 294 for (j=0; j<nz; j++) { 295 idx = 2*vi[j]; 296 it = 4*j; 297 x1 = x[idx]; x2 = x[1+idx]; 298 s1 -= v[it]*x1 + v[it+2]*x2; 299 s2 -= v[it+1]*x1 + v[it+3]*x2; 300 } 301 x[i2] = idiag[0]*s1 + idiag[2]*s2; 302 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 303 idiag -= 4; 304 i2 -= 2; 305 } 306 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 307 } 308 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 309 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 310 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 317 { 318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 319 PetscScalar *x,x1,x2,x3,s1,s2,s3; 320 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 321 const PetscScalar *b; 322 PetscErrorCode ierr; 323 PetscInt m = a->mbs,i,i2,nz,idx; 324 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 325 326 PetscFunctionBegin; 327 its = its*lits; 328 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 329 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 330 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 331 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 332 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 333 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 334 335 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 336 337 diag = a->diag; 338 idiag = a->idiag; 339 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 341 342 if (flag & SOR_ZERO_INITIAL_GUESS) { 343 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 344 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 345 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 346 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 347 i2 = 3; 348 idiag += 9; 349 for (i=1; i<m; i++) { 350 v = aa + 9*ai[i]; 351 vi = aj + ai[i]; 352 nz = diag[i] - ai[i]; 353 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 354 while (nz--) { 355 idx = 3*(*vi++); 356 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 357 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 358 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 359 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 360 v += 9; 361 } 362 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 363 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 364 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 365 idiag += 9; 366 i2 += 3; 367 } 368 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 369 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 370 } 371 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 372 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 373 i2 = 0; 374 mdiag = a->idiag+9*a->mbs; 375 for (i=0; i<m; i++) { 376 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 377 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 378 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 379 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 380 mdiag += 9; 381 i2 += 3; 382 } 383 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 384 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 385 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 386 } 387 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 388 idiag = a->idiag+9*a->mbs - 9; 389 i2 = 3*m - 3; 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 391 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 392 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 393 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 394 idiag -= 9; 395 i2 -= 3; 396 for (i=m-2; i>=0; i--) { 397 v = aa + 9*(diag[i]+1); 398 vi = aj + diag[i] + 1; 399 nz = ai[i+1] - diag[i] - 1; 400 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 401 while (nz--) { 402 idx = 3*(*vi++); 403 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 404 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 405 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 406 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 407 v += 9; 408 } 409 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 410 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 411 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 412 idiag -= 9; 413 i2 -= 3; 414 } 415 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 416 } 417 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 418 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 428 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 429 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 430 const PetscScalar *b; 431 PetscErrorCode ierr; 432 PetscInt m = a->mbs,i,i2,nz,idx; 433 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 434 435 PetscFunctionBegin; 436 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 437 its = its*lits; 438 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 439 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 440 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 441 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 442 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 443 444 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 445 446 diag = a->diag; 447 idiag = a->idiag; 448 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 449 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 450 451 if (flag & SOR_ZERO_INITIAL_GUESS) { 452 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 453 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 454 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 455 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 456 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 457 i2 = 4; 458 idiag += 16; 459 for (i=1; i<m; i++) { 460 v = aa + 16*ai[i]; 461 vi = aj + ai[i]; 462 nz = diag[i] - ai[i]; 463 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 464 while (nz--) { 465 idx = 4*(*vi++); 466 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 467 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 468 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 469 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 470 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 471 v += 16; 472 } 473 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 474 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 475 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 476 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 477 idiag += 16; 478 i2 += 4; 479 } 480 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 481 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 482 } 483 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 484 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 485 i2 = 0; 486 mdiag = a->idiag+16*a->mbs; 487 for (i=0; i<m; i++) { 488 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 489 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 490 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 491 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 492 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 493 mdiag += 16; 494 i2 += 4; 495 } 496 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 497 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 498 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 499 } 500 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 501 idiag = a->idiag+16*a->mbs - 16; 502 i2 = 4*m - 4; 503 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 504 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 505 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 506 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 507 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 508 idiag -= 16; 509 i2 -= 4; 510 for (i=m-2; i>=0; i--) { 511 v = aa + 16*(diag[i]+1); 512 vi = aj + diag[i] + 1; 513 nz = ai[i+1] - diag[i] - 1; 514 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 515 while (nz--) { 516 idx = 4*(*vi++); 517 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 518 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 519 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 520 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 521 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 522 v += 16; 523 } 524 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 525 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 526 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 527 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 528 idiag -= 16; 529 i2 -= 4; 530 } 531 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 532 } 533 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 534 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 535 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 545 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 546 const PetscScalar *b; 547 PetscErrorCode ierr; 548 PetscInt m = a->mbs,i,i2,nz,idx; 549 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 550 551 PetscFunctionBegin; 552 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 553 its = its*lits; 554 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 555 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 556 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 557 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 558 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 559 560 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 561 562 diag = a->diag; 563 idiag = a->idiag; 564 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 565 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 566 567 if (flag & SOR_ZERO_INITIAL_GUESS) { 568 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 569 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 570 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 571 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 572 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 573 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 574 i2 = 5; 575 idiag += 25; 576 for (i=1; i<m; i++) { 577 v = aa + 25*ai[i]; 578 vi = aj + ai[i]; 579 nz = diag[i] - ai[i]; 580 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 581 while (nz--) { 582 idx = 5*(*vi++); 583 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 584 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 585 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 586 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 587 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 588 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 589 v += 25; 590 } 591 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 592 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 593 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 594 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 595 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 596 idiag += 25; 597 i2 += 5; 598 } 599 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 600 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 601 } 602 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 603 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 604 i2 = 0; 605 mdiag = a->idiag+25*a->mbs; 606 for (i=0; i<m; i++) { 607 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 608 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 609 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 610 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 611 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 612 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 613 mdiag += 25; 614 i2 += 5; 615 } 616 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 617 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 618 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 619 } 620 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 621 idiag = a->idiag+25*a->mbs - 25; 622 i2 = 5*m - 5; 623 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 624 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 625 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 626 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 627 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 628 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 629 idiag -= 25; 630 i2 -= 5; 631 for (i=m-2; i>=0; i--) { 632 v = aa + 25*(diag[i]+1); 633 vi = aj + diag[i] + 1; 634 nz = ai[i+1] - diag[i] - 1; 635 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 636 while (nz--) { 637 idx = 5*(*vi++); 638 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 639 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 640 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 641 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 642 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 643 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 644 v += 25; 645 } 646 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 647 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 648 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 649 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 650 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 651 idiag -= 25; 652 i2 -= 5; 653 } 654 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 655 } 656 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 665 { 666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 667 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 668 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 669 const PetscScalar *b; 670 PetscErrorCode ierr; 671 PetscInt m = a->mbs,i,i2,nz,idx; 672 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 673 674 PetscFunctionBegin; 675 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 676 its = its*lits; 677 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 678 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 679 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 680 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 681 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 682 683 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 684 685 diag = a->diag; 686 idiag = a->idiag; 687 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 688 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 689 690 if (flag & SOR_ZERO_INITIAL_GUESS) { 691 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 692 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 693 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 694 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 695 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 696 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 697 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 698 i2 = 6; 699 idiag += 36; 700 for (i=1; i<m; i++) { 701 v = aa + 36*ai[i]; 702 vi = aj + ai[i]; 703 nz = diag[i] - ai[i]; 704 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 705 while (nz--) { 706 idx = 6*(*vi++); 707 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 708 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 709 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 710 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 711 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 712 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 713 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 714 v += 36; 715 } 716 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 717 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 718 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 719 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 720 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 721 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 722 idiag += 36; 723 i2 += 6; 724 } 725 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 726 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 727 } 728 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 729 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 730 i2 = 0; 731 mdiag = a->idiag+36*a->mbs; 732 for (i=0; i<m; i++) { 733 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 734 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 735 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 736 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 737 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 738 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 739 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 740 mdiag += 36; 741 i2 += 6; 742 } 743 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 744 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 745 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 746 } 747 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 748 idiag = a->idiag+36*a->mbs - 36; 749 i2 = 6*m - 6; 750 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 751 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 752 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 753 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 754 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 755 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 756 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 757 idiag -= 36; 758 i2 -= 6; 759 for (i=m-2; i>=0; i--) { 760 v = aa + 36*(diag[i]+1); 761 vi = aj + diag[i] + 1; 762 nz = ai[i+1] - diag[i] - 1; 763 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 764 while (nz--) { 765 idx = 6*(*vi++); 766 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 767 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 768 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 769 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 770 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 771 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 772 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 773 v += 36; 774 } 775 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 776 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 777 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 778 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 779 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 780 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 781 idiag -= 36; 782 i2 -= 6; 783 } 784 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 785 } 786 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 787 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 788 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 795 { 796 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 797 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 798 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 799 const PetscScalar *b; 800 PetscErrorCode ierr; 801 PetscInt m = a->mbs,i,i2,nz,idx; 802 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 803 804 PetscFunctionBegin; 805 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 806 its = its*lits; 807 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 808 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 809 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 810 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 811 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 812 813 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 814 815 diag = a->diag; 816 idiag = a->idiag; 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 819 820 if (flag & SOR_ZERO_INITIAL_GUESS) { 821 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 822 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 823 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 824 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 825 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 826 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 827 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 828 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 829 i2 = 7; 830 idiag += 49; 831 for (i=1; i<m; i++) { 832 v = aa + 49*ai[i]; 833 vi = aj + ai[i]; 834 nz = diag[i] - ai[i]; 835 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 836 while (nz--) { 837 idx = 7*(*vi++); 838 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 839 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 840 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 841 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 842 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 843 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 844 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 845 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 846 v += 49; 847 } 848 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 849 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 850 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 851 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 852 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 853 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 854 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 855 idiag += 49; 856 i2 += 7; 857 } 858 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 859 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 860 } 861 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 862 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 863 i2 = 0; 864 mdiag = a->idiag+49*a->mbs; 865 for (i=0; i<m; i++) { 866 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 867 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 868 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 869 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 870 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 871 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 872 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 873 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 874 mdiag += 49; 875 i2 += 7; 876 } 877 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 878 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 879 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 882 idiag = a->idiag+49*a->mbs - 49; 883 i2 = 7*m - 7; 884 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 885 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 886 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 887 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 888 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 889 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 890 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 891 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 892 idiag -= 49; 893 i2 -= 7; 894 for (i=m-2; i>=0; i--) { 895 v = aa + 49*(diag[i]+1); 896 vi = aj + diag[i] + 1; 897 nz = ai[i+1] - diag[i] - 1; 898 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 899 while (nz--) { 900 idx = 7*(*vi++); 901 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 902 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 903 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 904 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 905 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 906 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 907 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 908 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 909 v += 49; 910 } 911 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 912 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 913 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 914 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 915 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 916 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 917 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 918 idiag -= 49; 919 i2 -= 7; 920 } 921 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 922 } 923 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 924 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 925 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 932 { 933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 934 PetscScalar *x,*work,*w,*workt; 935 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 936 const PetscScalar *b; 937 PetscErrorCode ierr; 938 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 939 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 940 941 PetscFunctionBegin; 942 its = its*lits; 943 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 944 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 945 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 946 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 947 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 948 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 949 950 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 951 952 diag = a->diag; 953 idiag = a->idiag; 954 if (!a->mult_work) { 955 k = PetscMax(A->rmap->n,A->cmap->n); 956 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 957 } 958 work = a->mult_work; 959 if (!a->sor_work) { 960 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 961 } 962 w = a->sor_work; 963 964 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 966 967 if (flag & SOR_ZERO_INITIAL_GUESS) { 968 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 969 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 970 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 971 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 972 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 973 i2 = bs; 974 idiag += bs2; 975 for (i=1; i<m; i++) { 976 v = aa + bs2*ai[i]; 977 vi = aj + ai[i]; 978 nz = diag[i] - ai[i]; 979 980 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 981 /* copy all rows of x that are needed into contiguous space */ 982 workt = work; 983 for (j=0; j<nz; j++) { 984 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 985 workt += bs; 986 } 987 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 988 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 989 while (nz--) { 990 idx = N*(*vi++); 991 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 992 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 993 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 994 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 995 v += N2; 996 } */ 997 998 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 999 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1000 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1001 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1002 1003 idiag += bs2; 1004 i2 += bs; 1005 } 1006 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1007 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1008 } 1009 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1010 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1011 i2 = 0; 1012 mdiag = a->idiag+bs2*a->mbs; 1013 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1014 for (i=0; i<m; i++) { 1015 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1016 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1017 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1018 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1019 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1020 1021 mdiag += bs2; 1022 i2 += bs; 1023 } 1024 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1025 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1026 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1027 } 1028 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1029 idiag = a->idiag+bs2*a->mbs - bs2; 1030 i2 = bs*m - bs; 1031 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1033 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1034 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1035 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1036 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1037 idiag -= bs2; 1038 i2 -= bs; 1039 for (i=m-2; i>=0; i--) { 1040 v = aa + bs2*(diag[i]+1); 1041 vi = aj + diag[i] + 1; 1042 nz = ai[i+1] - diag[i] - 1; 1043 1044 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1045 /* copy all rows of x that are needed into contiguous space */ 1046 workt = work; 1047 for (j=0; j<nz; j++) { 1048 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1049 workt += bs; 1050 } 1051 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1052 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1053 while (nz--) { 1054 idx = N*(*vi++); 1055 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1056 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1057 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1058 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1059 v += N2; 1060 } */ 1061 1062 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1063 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1064 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1065 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1066 idiag -= bs2; 1067 i2 -= bs; 1068 } 1069 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1070 } 1071 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 /* 1078 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1079 */ 1080 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1083 #define matsetvaluesblocked4_ matsetvaluesblocked4 1084 #endif 1085 1086 EXTERN_C_BEGIN 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "matsetvaluesblocked4_" 1089 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1090 { 1091 Mat A = *AA; 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1094 PetscInt *ai =a->i,*ailen=a->ilen; 1095 PetscInt *aj =a->j,stepval,lastcol = -1; 1096 const PetscScalar *value = v; 1097 MatScalar *ap,*aa = a->a,*bap; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1101 stepval = (n-1)*4; 1102 for (k=0; k<m; k++) { /* loop over added rows */ 1103 row = im[k]; 1104 rp = aj + ai[row]; 1105 ap = aa + 16*ai[row]; 1106 nrow = ailen[row]; 1107 low = 0; 1108 high = nrow; 1109 for (l=0; l<n; l++) { /* loop over added columns */ 1110 col = in[l]; 1111 if (col <= lastcol) low = 0; 1112 else high = nrow; 1113 lastcol = col; 1114 value = v + k*(stepval+4 + l)*4; 1115 while (high-low > 7) { 1116 t = (low+high)/2; 1117 if (rp[t] > col) high = t; 1118 else low = t; 1119 } 1120 for (i=low; i<high; i++) { 1121 if (rp[i] > col) break; 1122 if (rp[i] == col) { 1123 bap = ap + 16*i; 1124 for (ii=0; ii<4; ii++,value+=stepval) { 1125 for (jj=ii; jj<16; jj+=4) { 1126 bap[jj] += *value++; 1127 } 1128 } 1129 goto noinsert2; 1130 } 1131 } 1132 N = nrow++ - 1; 1133 high++; /* added new column index thus must search to one higher than before */ 1134 /* shift up all the later entries in this row */ 1135 for (ii=N; ii>=i; ii--) { 1136 rp[ii+1] = rp[ii]; 1137 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1138 } 1139 if (N >= i) { 1140 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1141 } 1142 rp[i] = col; 1143 bap = ap + 16*i; 1144 for (ii=0; ii<4; ii++,value+=stepval) { 1145 for (jj=ii; jj<16; jj+=4) { 1146 bap[jj] = *value++; 1147 } 1148 } 1149 noinsert2:; 1150 low = i; 1151 } 1152 ailen[row] = nrow; 1153 } 1154 PetscFunctionReturnVoid(); 1155 } 1156 EXTERN_C_END 1157 1158 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1159 #define matsetvalues4_ MATSETVALUES4 1160 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1161 #define matsetvalues4_ matsetvalues4 1162 #endif 1163 1164 EXTERN_C_BEGIN 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatSetValues4_" 1167 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1168 { 1169 Mat A = *AA; 1170 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1171 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1172 PetscInt *ai=a->i,*ailen=a->ilen; 1173 PetscInt *aj=a->j,brow,bcol; 1174 PetscInt ridx,cidx,lastcol = -1; 1175 MatScalar *ap,value,*aa=a->a,*bap; 1176 1177 PetscFunctionBegin; 1178 for (k=0; k<m; k++) { /* loop over added rows */ 1179 row = im[k]; brow = row/4; 1180 rp = aj + ai[brow]; 1181 ap = aa + 16*ai[brow]; 1182 nrow = ailen[brow]; 1183 low = 0; 1184 high = nrow; 1185 for (l=0; l<n; l++) { /* loop over added columns */ 1186 col = in[l]; bcol = col/4; 1187 ridx = row % 4; cidx = col % 4; 1188 value = v[l + k*n]; 1189 if (col <= lastcol) low = 0; 1190 else high = nrow; 1191 lastcol = col; 1192 while (high-low > 7) { 1193 t = (low+high)/2; 1194 if (rp[t] > bcol) high = t; 1195 else low = t; 1196 } 1197 for (i=low; i<high; i++) { 1198 if (rp[i] > bcol) break; 1199 if (rp[i] == bcol) { 1200 bap = ap + 16*i + 4*cidx + ridx; 1201 *bap += value; 1202 goto noinsert1; 1203 } 1204 } 1205 N = nrow++ - 1; 1206 high++; /* added new column thus must search to one higher than before */ 1207 /* shift up all the later entries in this row */ 1208 for (ii=N; ii>=i; ii--) { 1209 rp[ii+1] = rp[ii]; 1210 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1211 } 1212 if (N>=i) { 1213 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1214 } 1215 rp[i] = bcol; 1216 ap[16*i + 4*cidx + ridx] = value; 1217 noinsert1:; 1218 low = i; 1219 } 1220 ailen[brow] = nrow; 1221 } 1222 PetscFunctionReturnVoid(); 1223 } 1224 EXTERN_C_END 1225 1226 /* 1227 Checks for missing diagonals 1228 */ 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1231 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1232 { 1233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1234 PetscErrorCode ierr; 1235 PetscInt *diag,*jj = a->j,i; 1236 1237 PetscFunctionBegin; 1238 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1239 *missing = PETSC_FALSE; 1240 if (A->rmap->n > 0 && !jj) { 1241 *missing = PETSC_TRUE; 1242 if (d) *d = 0; 1243 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1244 } else { 1245 diag = a->diag; 1246 for (i=0; i<a->mbs; i++) { 1247 if (jj[diag[i]] != i) { 1248 *missing = PETSC_TRUE; 1249 if (d) *d = i; 1250 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1251 break; 1252 } 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1260 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1261 { 1262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 PetscInt i,j,m = a->mbs; 1265 1266 PetscFunctionBegin; 1267 if (!a->diag) { 1268 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1269 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1270 a->free_diag = PETSC_TRUE; 1271 } 1272 for (i=0; i<m; i++) { 1273 a->diag[i] = a->i[i+1]; 1274 for (j=a->i[i]; j<a->i[i+1]; j++) { 1275 if (a->j[j] == i) { 1276 a->diag[i] = j; 1277 break; 1278 } 1279 } 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1287 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1288 { 1289 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1290 PetscErrorCode ierr; 1291 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1292 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1293 1294 PetscFunctionBegin; 1295 *nn = n; 1296 if (!ia) PetscFunctionReturn(0); 1297 if (symmetric) { 1298 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1299 nz = tia[n]; 1300 } else { 1301 tia = a->i; tja = a->j; 1302 } 1303 1304 if (!blockcompressed && bs > 1) { 1305 (*nn) *= bs; 1306 /* malloc & create the natural set of indices */ 1307 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1308 if (n) { 1309 (*ia)[0] = 0; 1310 for (j=1; j<bs; j++) { 1311 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1312 } 1313 } 1314 1315 for (i=1; i<n; i++) { 1316 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1317 for (j=1; j<bs; j++) { 1318 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1319 } 1320 } 1321 if (n) { 1322 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1323 } 1324 1325 if (inja) { 1326 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1327 cnt = 0; 1328 for (i=0; i<n; i++) { 1329 for (j=0; j<bs; j++) { 1330 for (k=tia[i]; k<tia[i+1]; k++) { 1331 for (l=0; l<bs; l++) { 1332 (*ja)[cnt++] = bs*tja[k] + l; 1333 } 1334 } 1335 } 1336 } 1337 } 1338 1339 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1340 ierr = PetscFree(tia);CHKERRQ(ierr); 1341 ierr = PetscFree(tja);CHKERRQ(ierr); 1342 } 1343 } else if (oshift == 1) { 1344 if (symmetric) { 1345 nz = tia[A->rmap->n/bs]; 1346 /* add 1 to i and j indices */ 1347 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1348 *ia = tia; 1349 if (ja) { 1350 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1351 *ja = tja; 1352 } 1353 } else { 1354 nz = a->i[A->rmap->n/bs]; 1355 /* malloc space and add 1 to i and j indices */ 1356 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1357 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1358 if (ja) { 1359 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1360 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1361 } 1362 } 1363 } else { 1364 *ia = tia; 1365 if (ja) *ja = tja; 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1372 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1373 { 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 if (!ia) PetscFunctionReturn(0); 1378 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1379 ierr = PetscFree(*ia);CHKERRQ(ierr); 1380 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1387 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1388 { 1389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 #if defined(PETSC_USE_LOG) 1394 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1395 #endif 1396 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1397 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1398 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1399 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1400 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1401 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1402 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1403 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1404 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1405 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1406 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1407 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1408 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1409 1410 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1411 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1412 ierr = PetscFree(A->data);CHKERRQ(ierr); 1413 1414 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1415 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1416 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1417 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1418 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1423 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1424 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1430 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 switch (op) { 1437 case MAT_ROW_ORIENTED: 1438 a->roworiented = flg; 1439 break; 1440 case MAT_KEEP_NONZERO_PATTERN: 1441 a->keepnonzeropattern = flg; 1442 break; 1443 case MAT_NEW_NONZERO_LOCATIONS: 1444 a->nonew = (flg ? 0 : 1); 1445 break; 1446 case MAT_NEW_NONZERO_LOCATION_ERR: 1447 a->nonew = (flg ? -1 : 0); 1448 break; 1449 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1450 a->nonew = (flg ? -2 : 0); 1451 break; 1452 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1453 a->nounused = (flg ? -1 : 0); 1454 break; 1455 case MAT_CHECK_COMPRESSED_ROW: 1456 a->compressedrow.check = flg; 1457 break; 1458 case MAT_NEW_DIAGONALS: 1459 case MAT_IGNORE_OFF_PROC_ENTRIES: 1460 case MAT_USE_HASH_TABLE: 1461 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1462 break; 1463 case MAT_SPD: 1464 case MAT_SYMMETRIC: 1465 case MAT_STRUCTURALLY_SYMMETRIC: 1466 case MAT_HERMITIAN: 1467 case MAT_SYMMETRY_ETERNAL: 1468 /* These options are handled directly by MatSetOption() */ 1469 break; 1470 default: 1471 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1478 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1479 { 1480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1481 PetscErrorCode ierr; 1482 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1483 MatScalar *aa,*aa_i; 1484 PetscScalar *v_i; 1485 1486 PetscFunctionBegin; 1487 bs = A->rmap->bs; 1488 ai = a->i; 1489 aj = a->j; 1490 aa = a->a; 1491 bs2 = a->bs2; 1492 1493 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1494 1495 bn = row/bs; /* Block number */ 1496 bp = row % bs; /* Block Position */ 1497 M = ai[bn+1] - ai[bn]; 1498 *nz = bs*M; 1499 1500 if (v) { 1501 *v = 0; 1502 if (*nz) { 1503 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1504 for (i=0; i<M; i++) { /* for each block in the block row */ 1505 v_i = *v + i*bs; 1506 aa_i = aa + bs2*(ai[bn] + i); 1507 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1508 } 1509 } 1510 } 1511 1512 if (idx) { 1513 *idx = 0; 1514 if (*nz) { 1515 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1516 for (i=0; i<M; i++) { /* for each block in the block row */ 1517 idx_i = *idx + i*bs; 1518 itmp = bs*aj[ai[bn] + i]; 1519 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1520 } 1521 } 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1528 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1529 { 1530 PetscErrorCode ierr; 1531 1532 PetscFunctionBegin; 1533 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1534 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1535 PetscFunctionReturn(0); 1536 } 1537 1538 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1542 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1543 { 1544 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1545 Mat C; 1546 PetscErrorCode ierr; 1547 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1548 PetscInt *rows,*cols,bs2=a->bs2; 1549 MatScalar *array; 1550 1551 PetscFunctionBegin; 1552 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1553 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1554 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1555 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1556 1557 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1558 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1559 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1560 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1561 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1562 ierr = PetscFree(col);CHKERRQ(ierr); 1563 } else { 1564 C = *B; 1565 } 1566 1567 array = a->a; 1568 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1569 for (i=0; i<mbs; i++) { 1570 cols[0] = i*bs; 1571 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1572 len = ai[i+1] - ai[i]; 1573 for (j=0; j<len; j++) { 1574 rows[0] = (*aj++)*bs; 1575 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1576 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1577 array += bs2; 1578 } 1579 } 1580 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1581 1582 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 1585 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1586 *B = C; 1587 } else { 1588 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 EXTERN_C_BEGIN 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1596 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1597 { 1598 PetscErrorCode ierr; 1599 Mat Btrans; 1600 1601 PetscFunctionBegin; 1602 *f = PETSC_FALSE; 1603 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1604 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1605 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 EXTERN_C_END 1609 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1612 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1613 { 1614 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1615 PetscErrorCode ierr; 1616 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1617 int fd; 1618 PetscScalar *aa; 1619 FILE *file; 1620 1621 PetscFunctionBegin; 1622 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1623 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1624 col_lens[0] = MAT_FILE_CLASSID; 1625 1626 col_lens[1] = A->rmap->N; 1627 col_lens[2] = A->cmap->n; 1628 col_lens[3] = a->nz*bs2; 1629 1630 /* store lengths of each row and write (including header) to file */ 1631 count = 0; 1632 for (i=0; i<a->mbs; i++) { 1633 for (j=0; j<bs; j++) { 1634 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1635 } 1636 } 1637 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1638 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1639 1640 /* store column indices (zero start index) */ 1641 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1642 count = 0; 1643 for (i=0; i<a->mbs; i++) { 1644 for (j=0; j<bs; j++) { 1645 for (k=a->i[i]; k<a->i[i+1]; k++) { 1646 for (l=0; l<bs; l++) { 1647 jj[count++] = bs*a->j[k] + l; 1648 } 1649 } 1650 } 1651 } 1652 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1653 ierr = PetscFree(jj);CHKERRQ(ierr); 1654 1655 /* store nonzero values */ 1656 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1657 count = 0; 1658 for (i=0; i<a->mbs; i++) { 1659 for (j=0; j<bs; j++) { 1660 for (k=a->i[i]; k<a->i[i+1]; k++) { 1661 for (l=0; l<bs; l++) { 1662 aa[count++] = a->a[bs2*k + l*bs + j]; 1663 } 1664 } 1665 } 1666 } 1667 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1668 ierr = PetscFree(aa);CHKERRQ(ierr); 1669 1670 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1671 if (file) { 1672 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1673 } 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1679 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1680 { 1681 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1682 PetscErrorCode ierr; 1683 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1684 PetscViewerFormat format; 1685 1686 PetscFunctionBegin; 1687 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1688 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1689 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1690 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1691 Mat aij; 1692 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1693 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1694 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1695 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1696 PetscFunctionReturn(0); 1697 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1698 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1699 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1700 for (i=0; i<a->mbs; i++) { 1701 for (j=0; j<bs; j++) { 1702 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1703 for (k=a->i[i]; k<a->i[i+1]; k++) { 1704 for (l=0; l<bs; l++) { 1705 #if defined(PETSC_USE_COMPLEX) 1706 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1707 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1708 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1709 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1710 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1711 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1712 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1713 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1714 } 1715 #else 1716 if (a->a[bs2*k + l*bs + j] != 0.0) { 1717 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1718 } 1719 #endif 1720 } 1721 } 1722 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1723 } 1724 } 1725 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1726 } else { 1727 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1728 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1729 for (i=0; i<a->mbs; i++) { 1730 for (j=0; j<bs; j++) { 1731 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1732 for (k=a->i[i]; k<a->i[i+1]; k++) { 1733 for (l=0; l<bs; l++) { 1734 #if defined(PETSC_USE_COMPLEX) 1735 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1736 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1737 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1738 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1739 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1740 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1741 } else { 1742 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1743 } 1744 #else 1745 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1746 #endif 1747 } 1748 } 1749 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1750 } 1751 } 1752 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1753 } 1754 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1760 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1761 { 1762 Mat A = (Mat) Aa; 1763 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1764 PetscErrorCode ierr; 1765 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1766 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1767 MatScalar *aa; 1768 PetscViewer viewer; 1769 PetscViewerFormat format; 1770 1771 PetscFunctionBegin; 1772 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1773 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1774 1775 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1776 1777 /* loop over matrix elements drawing boxes */ 1778 1779 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1780 color = PETSC_DRAW_BLUE; 1781 for (i=0,row=0; i<mbs; i++,row+=bs) { 1782 for (j=a->i[i]; j<a->i[i+1]; j++) { 1783 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1784 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1785 aa = a->a + j*bs2; 1786 for (k=0; k<bs; k++) { 1787 for (l=0; l<bs; l++) { 1788 if (PetscRealPart(*aa++) >= 0.) continue; 1789 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1790 } 1791 } 1792 } 1793 } 1794 color = PETSC_DRAW_CYAN; 1795 for (i=0,row=0; i<mbs; i++,row+=bs) { 1796 for (j=a->i[i]; j<a->i[i+1]; j++) { 1797 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1798 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1799 aa = a->a + j*bs2; 1800 for (k=0; k<bs; k++) { 1801 for (l=0; l<bs; l++) { 1802 if (PetscRealPart(*aa++) != 0.) continue; 1803 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1804 } 1805 } 1806 } 1807 } 1808 color = PETSC_DRAW_RED; 1809 for (i=0,row=0; i<mbs; i++,row+=bs) { 1810 for (j=a->i[i]; j<a->i[i+1]; j++) { 1811 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1812 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1813 aa = a->a + j*bs2; 1814 for (k=0; k<bs; k++) { 1815 for (l=0; l<bs; l++) { 1816 if (PetscRealPart(*aa++) <= 0.) continue; 1817 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1818 } 1819 } 1820 } 1821 } 1822 } else { 1823 /* use contour shading to indicate magnitude of values */ 1824 /* first determine max of all nonzero values */ 1825 PetscDraw popup; 1826 PetscReal scale,maxv = 0.0; 1827 1828 for (i=0; i<a->nz*a->bs2; i++) { 1829 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1830 } 1831 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1832 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1833 if (popup) { 1834 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1835 } 1836 for (i=0,row=0; i<mbs; i++,row+=bs) { 1837 for (j=a->i[i]; j<a->i[i+1]; j++) { 1838 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1839 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1840 aa = a->a + j*bs2; 1841 for (k=0; k<bs; k++) { 1842 for (l=0; l<bs; l++) { 1843 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1844 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1845 } 1846 } 1847 } 1848 } 1849 } 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1855 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1856 { 1857 PetscErrorCode ierr; 1858 PetscReal xl,yl,xr,yr,w,h; 1859 PetscDraw draw; 1860 PetscBool isnull; 1861 1862 PetscFunctionBegin; 1863 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1864 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1865 1866 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1867 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1868 xr += w; yr += h; xl = -w; yl = -h; 1869 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1870 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1871 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatView_SeqBAIJ" 1877 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1878 { 1879 PetscErrorCode ierr; 1880 PetscBool iascii,isbinary,isdraw; 1881 1882 PetscFunctionBegin; 1883 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1884 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1885 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1886 if (iascii) { 1887 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1888 } else if (isbinary) { 1889 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1890 } else if (isdraw) { 1891 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1892 } else { 1893 Mat B; 1894 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1895 ierr = MatView(B,viewer);CHKERRQ(ierr); 1896 ierr = MatDestroy(&B);CHKERRQ(ierr); 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1904 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1905 { 1906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1907 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1908 PetscInt *ai = a->i,*ailen = a->ilen; 1909 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1910 MatScalar *ap,*aa = a->a; 1911 1912 PetscFunctionBegin; 1913 for (k=0; k<m; k++) { /* loop over rows */ 1914 row = im[k]; brow = row/bs; 1915 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1916 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1917 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1918 nrow = ailen[brow]; 1919 for (l=0; l<n; l++) { /* loop over columns */ 1920 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1921 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1922 col = in[l]; 1923 bcol = col/bs; 1924 cidx = col%bs; 1925 ridx = row%bs; 1926 high = nrow; 1927 low = 0; /* assume unsorted */ 1928 while (high-low > 5) { 1929 t = (low+high)/2; 1930 if (rp[t] > bcol) high = t; 1931 else low = t; 1932 } 1933 for (i=low; i<high; i++) { 1934 if (rp[i] > bcol) break; 1935 if (rp[i] == bcol) { 1936 *v++ = ap[bs2*i+bs*cidx+ridx]; 1937 goto finished; 1938 } 1939 } 1940 *v++ = 0.0; 1941 finished:; 1942 } 1943 } 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1949 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1950 { 1951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1952 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1953 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1954 PetscErrorCode ierr; 1955 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1956 PetscBool roworiented=a->roworiented; 1957 const PetscScalar *value = v; 1958 MatScalar *ap,*aa = a->a,*bap; 1959 1960 PetscFunctionBegin; 1961 if (roworiented) { 1962 stepval = (n-1)*bs; 1963 } else { 1964 stepval = (m-1)*bs; 1965 } 1966 for (k=0; k<m; k++) { /* loop over added rows */ 1967 row = im[k]; 1968 if (row < 0) continue; 1969 #if defined(PETSC_USE_DEBUG) 1970 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1971 #endif 1972 rp = aj + ai[row]; 1973 ap = aa + bs2*ai[row]; 1974 rmax = imax[row]; 1975 nrow = ailen[row]; 1976 low = 0; 1977 high = nrow; 1978 for (l=0; l<n; l++) { /* loop over added columns */ 1979 if (in[l] < 0) continue; 1980 #if defined(PETSC_USE_DEBUG) 1981 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1982 #endif 1983 col = in[l]; 1984 if (roworiented) { 1985 value = v + (k*(stepval+bs) + l)*bs; 1986 } else { 1987 value = v + (l*(stepval+bs) + k)*bs; 1988 } 1989 if (col <= lastcol) low = 0; 1990 else high = nrow; 1991 lastcol = col; 1992 while (high-low > 7) { 1993 t = (low+high)/2; 1994 if (rp[t] > col) high = t; 1995 else low = t; 1996 } 1997 for (i=low; i<high; i++) { 1998 if (rp[i] > col) break; 1999 if (rp[i] == col) { 2000 bap = ap + bs2*i; 2001 if (roworiented) { 2002 if (is == ADD_VALUES) { 2003 for (ii=0; ii<bs; ii++,value+=stepval) { 2004 for (jj=ii; jj<bs2; jj+=bs) { 2005 bap[jj] += *value++; 2006 } 2007 } 2008 } else { 2009 for (ii=0; ii<bs; ii++,value+=stepval) { 2010 for (jj=ii; jj<bs2; jj+=bs) { 2011 bap[jj] = *value++; 2012 } 2013 } 2014 } 2015 } else { 2016 if (is == ADD_VALUES) { 2017 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2018 for (jj=0; jj<bs; jj++) { 2019 bap[jj] += value[jj]; 2020 } 2021 bap += bs; 2022 } 2023 } else { 2024 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2025 for (jj=0; jj<bs; jj++) { 2026 bap[jj] = value[jj]; 2027 } 2028 bap += bs; 2029 } 2030 } 2031 } 2032 goto noinsert2; 2033 } 2034 } 2035 if (nonew == 1) goto noinsert2; 2036 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2037 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2038 N = nrow++ - 1; high++; 2039 /* shift up all the later entries in this row */ 2040 for (ii=N; ii>=i; ii--) { 2041 rp[ii+1] = rp[ii]; 2042 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2043 } 2044 if (N >= i) { 2045 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2046 } 2047 rp[i] = col; 2048 bap = ap + bs2*i; 2049 if (roworiented) { 2050 for (ii=0; ii<bs; ii++,value+=stepval) { 2051 for (jj=ii; jj<bs2; jj+=bs) { 2052 bap[jj] = *value++; 2053 } 2054 } 2055 } else { 2056 for (ii=0; ii<bs; ii++,value+=stepval) { 2057 for (jj=0; jj<bs; jj++) { 2058 *bap++ = *value++; 2059 } 2060 } 2061 } 2062 noinsert2:; 2063 low = i; 2064 } 2065 ailen[row] = nrow; 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2072 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2073 { 2074 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2075 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2076 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2077 PetscErrorCode ierr; 2078 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2079 MatScalar *aa = a->a,*ap; 2080 PetscReal ratio=0.6; 2081 2082 PetscFunctionBegin; 2083 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2084 2085 if (m) rmax = ailen[0]; 2086 for (i=1; i<mbs; i++) { 2087 /* move each row back by the amount of empty slots (fshift) before it*/ 2088 fshift += imax[i-1] - ailen[i-1]; 2089 rmax = PetscMax(rmax,ailen[i]); 2090 if (fshift) { 2091 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2092 N = ailen[i]; 2093 for (j=0; j<N; j++) { 2094 ip[j-fshift] = ip[j]; 2095 2096 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2097 } 2098 } 2099 ai[i] = ai[i-1] + ailen[i-1]; 2100 } 2101 if (mbs) { 2102 fshift += imax[mbs-1] - ailen[mbs-1]; 2103 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2104 } 2105 /* reset ilen and imax for each row */ 2106 for (i=0; i<mbs; i++) { 2107 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2108 } 2109 a->nz = ai[mbs]; 2110 2111 /* diagonals may have moved, so kill the diagonal pointers */ 2112 a->idiagvalid = PETSC_FALSE; 2113 if (fshift && a->diag) { 2114 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2115 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2116 a->diag = 0; 2117 } 2118 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2119 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2120 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2121 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2122 2123 A->info.mallocs += a->reallocs; 2124 a->reallocs = 0; 2125 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2126 2127 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2128 2129 A->same_nonzero = PETSC_TRUE; 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /* 2134 This function returns an array of flags which indicate the locations of contiguous 2135 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2136 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2137 Assume: sizes should be long enough to hold all the values. 2138 */ 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2141 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2142 { 2143 PetscInt i,j,k,row; 2144 PetscBool flg; 2145 2146 PetscFunctionBegin; 2147 for (i=0,j=0; i<n; j++) { 2148 row = idx[i]; 2149 if (row%bs!=0) { /* Not the begining of a block */ 2150 sizes[j] = 1; 2151 i++; 2152 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2153 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2154 i++; 2155 } else { /* Begining of the block, so check if the complete block exists */ 2156 flg = PETSC_TRUE; 2157 for (k=1; k<bs; k++) { 2158 if (row+k != idx[i+k]) { /* break in the block */ 2159 flg = PETSC_FALSE; 2160 break; 2161 } 2162 } 2163 if (flg) { /* No break in the bs */ 2164 sizes[j] = bs; 2165 i += bs; 2166 } else { 2167 sizes[j] = 1; 2168 i++; 2169 } 2170 } 2171 } 2172 *bs_max = j; 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2178 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2179 { 2180 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2181 PetscErrorCode ierr; 2182 PetscInt i,j,k,count,*rows; 2183 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2184 PetscScalar zero = 0.0; 2185 MatScalar *aa; 2186 const PetscScalar *xx; 2187 PetscScalar *bb; 2188 2189 PetscFunctionBegin; 2190 /* fix right hand side if needed */ 2191 if (x && b) { 2192 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2193 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2194 for (i=0; i<is_n; i++) { 2195 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2196 } 2197 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2198 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2199 } 2200 2201 /* Make a copy of the IS and sort it */ 2202 /* allocate memory for rows,sizes */ 2203 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2204 2205 /* copy IS values to rows, and sort them */ 2206 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2207 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2208 2209 if (baij->keepnonzeropattern) { 2210 for (i=0; i<is_n; i++) sizes[i] = 1; 2211 bs_max = is_n; 2212 A->same_nonzero = PETSC_TRUE; 2213 } else { 2214 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2215 2216 A->same_nonzero = PETSC_FALSE; 2217 } 2218 2219 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2220 row = rows[j]; 2221 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2222 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2223 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2224 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2225 if (diag != (PetscScalar)0.0) { 2226 if (baij->ilen[row/bs] > 0) { 2227 baij->ilen[row/bs] = 1; 2228 baij->j[baij->i[row/bs]] = row/bs; 2229 2230 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2231 } 2232 /* Now insert all the diagonal values for this bs */ 2233 for (k=0; k<bs; k++) { 2234 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2235 } 2236 } else { /* (diag == 0.0) */ 2237 baij->ilen[row/bs] = 0; 2238 } /* end (diag == 0.0) */ 2239 } else { /* (sizes[i] != bs) */ 2240 #if defined(PETSC_USE_DEBUG) 2241 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2242 #endif 2243 for (k=0; k<count; k++) { 2244 aa[0] = zero; 2245 aa += bs; 2246 } 2247 if (diag != (PetscScalar)0.0) { 2248 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2249 } 2250 } 2251 } 2252 2253 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2254 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2260 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2261 { 2262 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2263 PetscErrorCode ierr; 2264 PetscInt i,j,k,count; 2265 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2266 PetscScalar zero = 0.0; 2267 MatScalar *aa; 2268 const PetscScalar *xx; 2269 PetscScalar *bb; 2270 PetscBool *zeroed,vecs = PETSC_FALSE; 2271 2272 PetscFunctionBegin; 2273 /* fix right hand side if needed */ 2274 if (x && b) { 2275 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2276 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2277 vecs = PETSC_TRUE; 2278 } 2279 A->same_nonzero = PETSC_TRUE; 2280 2281 /* zero the columns */ 2282 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2283 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2284 for (i=0; i<is_n; i++) { 2285 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2286 zeroed[is_idx[i]] = PETSC_TRUE; 2287 } 2288 for (i=0; i<A->rmap->N; i++) { 2289 if (!zeroed[i]) { 2290 row = i/bs; 2291 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2292 for (k=0; k<bs; k++) { 2293 col = bs*baij->j[j] + k; 2294 if (zeroed[col]) { 2295 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2296 if (vecs) bb[i] -= aa[0]*xx[col]; 2297 aa[0] = 0.0; 2298 } 2299 } 2300 } 2301 } else if (vecs) bb[i] = diag*xx[i]; 2302 } 2303 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2304 if (vecs) { 2305 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2306 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2307 } 2308 2309 /* zero the rows */ 2310 for (i=0; i<is_n; i++) { 2311 row = is_idx[i]; 2312 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2313 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2314 for (k=0; k<count; k++) { 2315 aa[0] = zero; 2316 aa += bs; 2317 } 2318 if (diag != (PetscScalar)0.0) { 2319 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2320 } 2321 } 2322 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2328 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2329 { 2330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2331 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2332 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2333 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2334 PetscErrorCode ierr; 2335 PetscInt ridx,cidx,bs2=a->bs2; 2336 PetscBool roworiented=a->roworiented; 2337 MatScalar *ap,value,*aa=a->a,*bap; 2338 2339 PetscFunctionBegin; 2340 if (v) PetscValidScalarPointer(v,6); 2341 for (k=0; k<m; k++) { /* loop over added rows */ 2342 row = im[k]; 2343 brow = row/bs; 2344 if (row < 0) continue; 2345 #if defined(PETSC_USE_DEBUG) 2346 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2347 #endif 2348 rp = aj + ai[brow]; 2349 ap = aa + bs2*ai[brow]; 2350 rmax = imax[brow]; 2351 nrow = ailen[brow]; 2352 low = 0; 2353 high = nrow; 2354 for (l=0; l<n; l++) { /* loop over added columns */ 2355 if (in[l] < 0) continue; 2356 #if defined(PETSC_USE_DEBUG) 2357 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2358 #endif 2359 col = in[l]; bcol = col/bs; 2360 ridx = row % bs; cidx = col % bs; 2361 if (roworiented) { 2362 value = v[l + k*n]; 2363 } else { 2364 value = v[k + l*m]; 2365 } 2366 if (col <= lastcol) low = 0; else high = nrow; 2367 lastcol = col; 2368 while (high-low > 7) { 2369 t = (low+high)/2; 2370 if (rp[t] > bcol) high = t; 2371 else low = t; 2372 } 2373 for (i=low; i<high; i++) { 2374 if (rp[i] > bcol) break; 2375 if (rp[i] == bcol) { 2376 bap = ap + bs2*i + bs*cidx + ridx; 2377 if (is == ADD_VALUES) *bap += value; 2378 else *bap = value; 2379 goto noinsert1; 2380 } 2381 } 2382 if (nonew == 1) goto noinsert1; 2383 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2384 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2385 N = nrow++ - 1; high++; 2386 /* shift up all the later entries in this row */ 2387 for (ii=N; ii>=i; ii--) { 2388 rp[ii+1] = rp[ii]; 2389 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2390 } 2391 if (N>=i) { 2392 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2393 } 2394 rp[i] = bcol; 2395 ap[bs2*i + bs*cidx + ridx] = value; 2396 a->nz++; 2397 noinsert1:; 2398 low = i; 2399 } 2400 ailen[brow] = nrow; 2401 } 2402 A->same_nonzero = PETSC_FALSE; 2403 PetscFunctionReturn(0); 2404 } 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2408 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2409 { 2410 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2411 Mat outA; 2412 PetscErrorCode ierr; 2413 PetscBool row_identity,col_identity; 2414 2415 PetscFunctionBegin; 2416 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2417 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2418 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2419 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2420 2421 outA = inA; 2422 inA->factortype = MAT_FACTOR_LU; 2423 2424 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2425 2426 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2427 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2428 a->row = row; 2429 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2430 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2431 a->col = col; 2432 2433 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2434 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2435 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2436 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2437 2438 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2439 if (!a->solve_work) { 2440 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2441 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2442 } 2443 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 EXTERN_C_BEGIN 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2450 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2451 { 2452 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2453 PetscInt i,nz,mbs; 2454 2455 PetscFunctionBegin; 2456 nz = baij->maxnz; 2457 mbs = baij->mbs; 2458 for (i=0; i<nz; i++) { 2459 baij->j[i] = indices[i]; 2460 } 2461 baij->nz = nz; 2462 for (i=0; i<mbs; i++) { 2463 baij->ilen[i] = baij->imax[i]; 2464 } 2465 PetscFunctionReturn(0); 2466 } 2467 EXTERN_C_END 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2471 /*@ 2472 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2473 in the matrix. 2474 2475 Input Parameters: 2476 + mat - the SeqBAIJ matrix 2477 - indices - the column indices 2478 2479 Level: advanced 2480 2481 Notes: 2482 This can be called if you have precomputed the nonzero structure of the 2483 matrix and want to provide it to the matrix object to improve the performance 2484 of the MatSetValues() operation. 2485 2486 You MUST have set the correct numbers of nonzeros per row in the call to 2487 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2488 2489 MUST be called before any calls to MatSetValues(); 2490 2491 @*/ 2492 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2493 { 2494 PetscErrorCode ierr; 2495 2496 PetscFunctionBegin; 2497 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2498 PetscValidPointer(indices,2); 2499 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2505 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2506 { 2507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2508 PetscErrorCode ierr; 2509 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2510 PetscReal atmp; 2511 PetscScalar *x,zero = 0.0; 2512 MatScalar *aa; 2513 PetscInt ncols,brow,krow,kcol; 2514 2515 PetscFunctionBegin; 2516 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2517 bs = A->rmap->bs; 2518 aa = a->a; 2519 ai = a->i; 2520 aj = a->j; 2521 mbs = a->mbs; 2522 2523 ierr = VecSet(v,zero);CHKERRQ(ierr); 2524 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2525 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2526 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2527 for (i=0; i<mbs; i++) { 2528 ncols = ai[1] - ai[0]; ai++; 2529 brow = bs*i; 2530 for (j=0; j<ncols; j++) { 2531 for (kcol=0; kcol<bs; kcol++) { 2532 for (krow=0; krow<bs; krow++) { 2533 atmp = PetscAbsScalar(*aa);aa++; 2534 row = brow + krow; /* row index */ 2535 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2536 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2537 } 2538 } 2539 aj++; 2540 } 2541 } 2542 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatCopy_SeqBAIJ" 2548 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2549 { 2550 PetscErrorCode ierr; 2551 2552 PetscFunctionBegin; 2553 /* If the two matrices have the same copy implementation, use fast copy. */ 2554 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2556 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2557 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2558 2559 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2560 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2561 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2562 } else { 2563 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2564 } 2565 PetscFunctionReturn(0); 2566 } 2567 2568 #undef __FUNCT__ 2569 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2570 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2571 { 2572 PetscErrorCode ierr; 2573 2574 PetscFunctionBegin; 2575 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2581 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2582 { 2583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2584 2585 PetscFunctionBegin; 2586 *array = a->a; 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2592 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2593 { 2594 PetscFunctionBegin; 2595 PetscFunctionReturn(0); 2596 } 2597 2598 #undef __FUNCT__ 2599 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2600 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2601 { 2602 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2603 PetscErrorCode ierr; 2604 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2605 PetscBLASInt one=1; 2606 2607 PetscFunctionBegin; 2608 if (str == SAME_NONZERO_PATTERN) { 2609 PetscScalar alpha = a; 2610 PetscBLASInt bnz; 2611 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2612 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2613 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2614 if (y->xtoy && y->XtoY != X) { 2615 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2616 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2617 } 2618 if (!y->xtoy) { /* get xtoy */ 2619 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2620 y->XtoY = X; 2621 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2622 } 2623 for (i=0; i<x->nz; i++) { 2624 j = 0; 2625 while (j < bs2) { 2626 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2627 j++; 2628 } 2629 } 2630 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2631 } else { 2632 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2633 } 2634 PetscFunctionReturn(0); 2635 } 2636 2637 #undef __FUNCT__ 2638 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2639 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2640 { 2641 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2642 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2643 MatScalar *aa = a->a; 2644 2645 PetscFunctionBegin; 2646 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2647 PetscFunctionReturn(0); 2648 } 2649 2650 #undef __FUNCT__ 2651 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2652 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2653 { 2654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2655 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2656 MatScalar *aa = a->a; 2657 2658 PetscFunctionBegin; 2659 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2660 PetscFunctionReturn(0); 2661 } 2662 2663 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2664 2665 #undef __FUNCT__ 2666 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2667 /* 2668 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2669 */ 2670 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2671 { 2672 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2673 PetscErrorCode ierr; 2674 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2675 PetscInt nz = a->i[m],row,*jj,mr,col; 2676 2677 PetscFunctionBegin; 2678 *nn = n; 2679 if (!ia) PetscFunctionReturn(0); 2680 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2681 else { 2682 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2683 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2684 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2685 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2686 jj = a->j; 2687 for (i=0; i<nz; i++) { 2688 collengths[jj[i]]++; 2689 } 2690 cia[0] = oshift; 2691 for (i=0; i<n; i++) { 2692 cia[i+1] = cia[i] + collengths[i]; 2693 } 2694 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2695 jj = a->j; 2696 for (row=0; row<m; row++) { 2697 mr = a->i[row+1] - a->i[row]; 2698 for (i=0; i<mr; i++) { 2699 col = *jj++; 2700 2701 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2702 } 2703 } 2704 ierr = PetscFree(collengths);CHKERRQ(ierr); 2705 *ia = cia; *ja = cja; 2706 } 2707 PetscFunctionReturn(0); 2708 } 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2712 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2713 { 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 if (!ia) PetscFunctionReturn(0); 2718 ierr = PetscFree(*ia);CHKERRQ(ierr); 2719 ierr = PetscFree(*ja);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2725 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2726 { 2727 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2728 PetscErrorCode ierr; 2729 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2730 PetscScalar dx,*y,*xx,*w3_array; 2731 PetscScalar *vscale_array; 2732 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2733 Vec w1 = coloring->w1,w2=coloring->w2,w3; 2734 void *fctx = coloring->fctx; 2735 PetscBool flg = PETSC_FALSE; 2736 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2737 Vec x1_tmp; 2738 2739 PetscFunctionBegin; 2740 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2741 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2742 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2743 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2744 2745 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2746 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2747 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2748 if (flg) { 2749 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2750 } else { 2751 PetscBool assembled; 2752 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2753 if (assembled) { 2754 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2755 } 2756 } 2757 2758 x1_tmp = x1; 2759 if (!coloring->vscale) { 2760 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2761 } 2762 2763 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2764 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2765 } 2766 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2767 2768 /* Set w1 = F(x1) */ 2769 if (!coloring->fset) { 2770 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2771 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2772 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2773 } else { 2774 coloring->fset = PETSC_FALSE; 2775 } 2776 2777 if (!coloring->w3) { 2778 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2779 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2780 } 2781 w3 = coloring->w3; 2782 2783 CHKMEMQ; 2784 /* Compute all the local scale factors, including ghost points */ 2785 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2786 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2787 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2788 if (ctype == IS_COLORING_GHOSTED) { 2789 col_start = 0; col_end = N; 2790 } else if (ctype == IS_COLORING_GLOBAL) { 2791 xx = xx - start; 2792 vscale_array = vscale_array - start; 2793 col_start = start; col_end = N + start; 2794 } 2795 CHKMEMQ; 2796 for (col=col_start; col<col_end; col++) { 2797 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2798 if (coloring->htype[0] == 'w') { 2799 dx = 1.0 + unorm; 2800 } else { 2801 dx = xx[col]; 2802 } 2803 if (dx == (PetscScalar)0.0) dx = 1.0; 2804 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2805 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2806 dx *= epsilon; 2807 vscale_array[col] = (PetscScalar)1.0/dx; 2808 } CHKMEMQ; 2809 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2810 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2811 if (ctype == IS_COLORING_GLOBAL) { 2812 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2813 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2814 } 2815 CHKMEMQ; 2816 if (coloring->vscaleforrow) { 2817 vscaleforrow = coloring->vscaleforrow; 2818 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2819 2820 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2821 /* 2822 Loop over each color 2823 */ 2824 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2825 for (k=0; k<coloring->ncolors; k++) { 2826 coloring->currentcolor = k; 2827 for (i=0; i<bs; i++) { 2828 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2829 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2830 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2831 /* 2832 Loop over each column associated with color 2833 adding the perturbation to the vector w3. 2834 */ 2835 for (l=0; l<coloring->ncolumns[k]; l++) { 2836 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2837 if (coloring->htype[0] == 'w') { 2838 dx = 1.0 + unorm; 2839 } else { 2840 dx = xx[col]; 2841 } 2842 if (dx == (PetscScalar)0.0) dx = 1.0; 2843 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2844 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2845 dx *= epsilon; 2846 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2847 w3_array[col] += dx; 2848 } 2849 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2850 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2851 2852 /* 2853 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2854 w2 = F(x1 + dx) - F(x1) 2855 */ 2856 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2857 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2858 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2859 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2860 2861 /* 2862 Loop over rows of vector, putting results into Jacobian matrix 2863 */ 2864 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2865 for (l=0; l<coloring->nrows[k]; l++) { 2866 row = bs*coloring->rows[k][l]; /* local row index */ 2867 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2868 for (j=0; j<bs; j++) { 2869 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2870 srows[j] = row + start + j; 2871 } 2872 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2873 } 2874 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2875 } 2876 } /* endof for each color */ 2877 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2878 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2879 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2880 ierr = PetscFree(srows);CHKERRQ(ierr); 2881 2882 coloring->currentcolor = -1; 2883 2884 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2887 PetscFunctionReturn(0); 2888 } 2889 2890 /* -------------------------------------------------------------------*/ 2891 static struct _MatOps MatOps_Values = { 2892 MatSetValues_SeqBAIJ, 2893 MatGetRow_SeqBAIJ, 2894 MatRestoreRow_SeqBAIJ, 2895 MatMult_SeqBAIJ_N, 2896 /* 4*/ MatMultAdd_SeqBAIJ_N, 2897 MatMultTranspose_SeqBAIJ, 2898 MatMultTransposeAdd_SeqBAIJ, 2899 0, 2900 0, 2901 0, 2902 /* 10*/ 0, 2903 MatLUFactor_SeqBAIJ, 2904 0, 2905 0, 2906 MatTranspose_SeqBAIJ, 2907 /* 15*/ MatGetInfo_SeqBAIJ, 2908 MatEqual_SeqBAIJ, 2909 MatGetDiagonal_SeqBAIJ, 2910 MatDiagonalScale_SeqBAIJ, 2911 MatNorm_SeqBAIJ, 2912 /* 20*/ 0, 2913 MatAssemblyEnd_SeqBAIJ, 2914 MatSetOption_SeqBAIJ, 2915 MatZeroEntries_SeqBAIJ, 2916 /* 24*/ MatZeroRows_SeqBAIJ, 2917 0, 2918 0, 2919 0, 2920 0, 2921 /* 29*/ MatSetUp_SeqBAIJ, 2922 0, 2923 0, 2924 0, 2925 0, 2926 /* 34*/ MatDuplicate_SeqBAIJ, 2927 0, 2928 0, 2929 MatILUFactor_SeqBAIJ, 2930 0, 2931 /* 39*/ MatAXPY_SeqBAIJ, 2932 MatGetSubMatrices_SeqBAIJ, 2933 MatIncreaseOverlap_SeqBAIJ, 2934 MatGetValues_SeqBAIJ, 2935 MatCopy_SeqBAIJ, 2936 /* 44*/ 0, 2937 MatScale_SeqBAIJ, 2938 0, 2939 0, 2940 MatZeroRowsColumns_SeqBAIJ, 2941 /* 49*/ 0, 2942 MatGetRowIJ_SeqBAIJ, 2943 MatRestoreRowIJ_SeqBAIJ, 2944 MatGetColumnIJ_SeqBAIJ, 2945 MatRestoreColumnIJ_SeqBAIJ, 2946 /* 54*/ MatFDColoringCreate_SeqAIJ, 2947 0, 2948 0, 2949 0, 2950 MatSetValuesBlocked_SeqBAIJ, 2951 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2952 MatDestroy_SeqBAIJ, 2953 MatView_SeqBAIJ, 2954 0, 2955 0, 2956 /* 64*/ 0, 2957 0, 2958 0, 2959 0, 2960 0, 2961 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2962 0, 2963 MatConvert_Basic, 2964 0, 2965 0, 2966 /* 74*/ 0, 2967 MatFDColoringApply_BAIJ, 2968 0, 2969 0, 2970 0, 2971 /* 79*/ 0, 2972 0, 2973 0, 2974 0, 2975 MatLoad_SeqBAIJ, 2976 /* 84*/ 0, 2977 0, 2978 0, 2979 0, 2980 0, 2981 /* 89*/ 0, 2982 0, 2983 0, 2984 0, 2985 0, 2986 /* 94*/ 0, 2987 0, 2988 0, 2989 0, 2990 0, 2991 /* 99*/ 0, 2992 0, 2993 0, 2994 0, 2995 0, 2996 /*104*/ 0, 2997 MatRealPart_SeqBAIJ, 2998 MatImaginaryPart_SeqBAIJ, 2999 0, 3000 0, 3001 /*109*/ 0, 3002 0, 3003 0, 3004 0, 3005 MatMissingDiagonal_SeqBAIJ, 3006 /*114*/ 0, 3007 0, 3008 0, 3009 0, 3010 0, 3011 /*119*/ 0, 3012 0, 3013 MatMultHermitianTranspose_SeqBAIJ, 3014 MatMultHermitianTransposeAdd_SeqBAIJ, 3015 0, 3016 /*124*/ 0, 3017 0, 3018 MatInvertBlockDiagonal_SeqBAIJ 3019 }; 3020 3021 EXTERN_C_BEGIN 3022 #undef __FUNCT__ 3023 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3024 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3025 { 3026 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3027 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3028 PetscErrorCode ierr; 3029 3030 PetscFunctionBegin; 3031 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3032 3033 /* allocate space for values if not already there */ 3034 if (!aij->saved_values) { 3035 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3036 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3037 } 3038 3039 /* copy values over */ 3040 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3041 PetscFunctionReturn(0); 3042 } 3043 EXTERN_C_END 3044 3045 EXTERN_C_BEGIN 3046 #undef __FUNCT__ 3047 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3048 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3049 { 3050 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3051 PetscErrorCode ierr; 3052 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3053 3054 PetscFunctionBegin; 3055 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3056 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3057 3058 /* copy values over */ 3059 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3060 PetscFunctionReturn(0); 3061 } 3062 EXTERN_C_END 3063 3064 EXTERN_C_BEGIN 3065 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3066 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3067 EXTERN_C_END 3068 3069 EXTERN_C_BEGIN 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3072 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3073 { 3074 Mat_SeqBAIJ *b; 3075 PetscErrorCode ierr; 3076 PetscInt i,mbs,nbs,bs2; 3077 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3078 3079 PetscFunctionBegin; 3080 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3081 if (nz == MAT_SKIP_ALLOCATION) { 3082 skipallocation = PETSC_TRUE; 3083 nz = 0; 3084 } 3085 3086 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3087 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3088 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3089 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3090 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3091 3092 B->preallocated = PETSC_TRUE; 3093 3094 mbs = B->rmap->n/bs; 3095 nbs = B->cmap->n/bs; 3096 bs2 = bs*bs; 3097 3098 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3099 3100 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3101 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3102 if (nnz) { 3103 for (i=0; i<mbs; i++) { 3104 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3105 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3106 } 3107 } 3108 3109 b = (Mat_SeqBAIJ*)B->data; 3110 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3111 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3112 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3113 3114 if (!flg) { 3115 switch (bs) { 3116 case 1: 3117 B->ops->mult = MatMult_SeqBAIJ_1; 3118 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3119 B->ops->sor = MatSOR_SeqBAIJ_1; 3120 break; 3121 case 2: 3122 B->ops->mult = MatMult_SeqBAIJ_2; 3123 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3124 B->ops->sor = MatSOR_SeqBAIJ_2; 3125 break; 3126 case 3: 3127 B->ops->mult = MatMult_SeqBAIJ_3; 3128 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3129 B->ops->sor = MatSOR_SeqBAIJ_3; 3130 break; 3131 case 4: 3132 B->ops->mult = MatMult_SeqBAIJ_4; 3133 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3134 B->ops->sor = MatSOR_SeqBAIJ_4; 3135 break; 3136 case 5: 3137 B->ops->mult = MatMult_SeqBAIJ_5; 3138 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3139 B->ops->sor = MatSOR_SeqBAIJ_5; 3140 break; 3141 case 6: 3142 B->ops->mult = MatMult_SeqBAIJ_6; 3143 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3144 B->ops->sor = MatSOR_SeqBAIJ_6; 3145 break; 3146 case 7: 3147 B->ops->mult = MatMult_SeqBAIJ_7; 3148 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3149 B->ops->sor = MatSOR_SeqBAIJ_7; 3150 break; 3151 case 15: 3152 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3153 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3154 B->ops->sor = MatSOR_SeqBAIJ_N; 3155 break; 3156 default: 3157 B->ops->mult = MatMult_SeqBAIJ_N; 3158 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3159 B->ops->sor = MatSOR_SeqBAIJ_N; 3160 break; 3161 } 3162 } 3163 b->mbs = mbs; 3164 b->nbs = nbs; 3165 if (!skipallocation) { 3166 if (!b->imax) { 3167 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3168 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3169 3170 b->free_imax_ilen = PETSC_TRUE; 3171 } 3172 /* b->ilen will count nonzeros in each block row so far. */ 3173 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3174 if (!nnz) { 3175 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3176 else if (nz < 0) nz = 1; 3177 for (i=0; i<mbs; i++) b->imax[i] = nz; 3178 nz = nz*mbs; 3179 } else { 3180 nz = 0; 3181 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3182 } 3183 3184 /* allocate the matrix space */ 3185 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3186 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3187 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3188 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3189 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3190 3191 b->singlemalloc = PETSC_TRUE; 3192 b->i[0] = 0; 3193 for (i=1; i<mbs+1; i++) { 3194 b->i[i] = b->i[i-1] + b->imax[i-1]; 3195 } 3196 b->free_a = PETSC_TRUE; 3197 b->free_ij = PETSC_TRUE; 3198 } else { 3199 b->free_a = PETSC_FALSE; 3200 b->free_ij = PETSC_FALSE; 3201 } 3202 3203 b->bs2 = bs2; 3204 b->mbs = mbs; 3205 b->nz = 0; 3206 b->maxnz = nz; 3207 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3208 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3209 PetscFunctionReturn(0); 3210 } 3211 EXTERN_C_END 3212 3213 EXTERN_C_BEGIN 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3216 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3217 { 3218 PetscInt i,m,nz,nz_max=0,*nnz; 3219 PetscScalar *values=0; 3220 PetscErrorCode ierr; 3221 3222 PetscFunctionBegin; 3223 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3224 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3225 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3226 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3227 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3228 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3229 m = B->rmap->n/bs; 3230 3231 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3232 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3233 for (i=0; i<m; i++) { 3234 nz = ii[i+1]- ii[i]; 3235 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3236 nz_max = PetscMax(nz_max, nz); 3237 nnz[i] = nz; 3238 } 3239 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3240 ierr = PetscFree(nnz);CHKERRQ(ierr); 3241 3242 values = (PetscScalar*)V; 3243 if (!values) { 3244 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3245 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3246 } 3247 for (i=0; i<m; i++) { 3248 PetscInt ncols = ii[i+1] - ii[i]; 3249 const PetscInt *icols = jj + ii[i]; 3250 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3251 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3252 } 3253 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3254 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3255 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3256 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3257 PetscFunctionReturn(0); 3258 } 3259 EXTERN_C_END 3260 3261 3262 EXTERN_C_BEGIN 3263 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3264 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3265 #if defined(PETSC_HAVE_MUMPS) 3266 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3267 #endif 3268 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3269 EXTERN_C_END 3270 3271 /*MC 3272 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3273 block sparse compressed row format. 3274 3275 Options Database Keys: 3276 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3277 3278 Level: beginner 3279 3280 .seealso: MatCreateSeqBAIJ() 3281 M*/ 3282 3283 EXTERN_C_BEGIN 3284 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3285 EXTERN_C_END 3286 3287 EXTERN_C_BEGIN 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatCreate_SeqBAIJ" 3290 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3291 { 3292 PetscErrorCode ierr; 3293 PetscMPIInt size; 3294 Mat_SeqBAIJ *b; 3295 3296 PetscFunctionBegin; 3297 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3298 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3299 3300 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3301 B->data = (void*)b; 3302 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3303 3304 b->row = 0; 3305 b->col = 0; 3306 b->icol = 0; 3307 b->reallocs = 0; 3308 b->saved_values = 0; 3309 3310 b->roworiented = PETSC_TRUE; 3311 b->nonew = 0; 3312 b->diag = 0; 3313 b->solve_work = 0; 3314 b->mult_work = 0; 3315 B->spptr = 0; 3316 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3317 b->keepnonzeropattern = PETSC_FALSE; 3318 b->xtoy = 0; 3319 b->XtoY = 0; 3320 B->same_nonzero = PETSC_FALSE; 3321 3322 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3323 "MatGetFactorAvailable_seqbaij_petsc", 3324 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3325 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3326 "MatGetFactor_seqbaij_petsc", 3327 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3328 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3329 "MatGetFactor_seqbaij_bstrm", 3330 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3331 #if defined(PETSC_HAVE_MUMPS) 3332 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3333 #endif 3334 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3335 "MatInvertBlockDiagonal_SeqBAIJ", 3336 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3337 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3338 "MatStoreValues_SeqBAIJ", 3339 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3340 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3341 "MatRetrieveValues_SeqBAIJ", 3342 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3343 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3344 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3345 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3346 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3347 "MatConvert_SeqBAIJ_SeqAIJ", 3348 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3349 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3350 "MatConvert_SeqBAIJ_SeqSBAIJ", 3351 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3352 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3353 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3354 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3355 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3356 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3357 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3358 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3359 "MatConvert_SeqBAIJ_SeqBSTRM", 3360 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3361 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3362 "MatIsTranspose_SeqBAIJ", 3363 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3364 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3365 PetscFunctionReturn(0); 3366 } 3367 EXTERN_C_END 3368 3369 #undef __FUNCT__ 3370 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3371 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3372 { 3373 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3374 PetscErrorCode ierr; 3375 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3376 3377 PetscFunctionBegin; 3378 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3379 3380 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3381 c->imax = a->imax; 3382 c->ilen = a->ilen; 3383 c->free_imax_ilen = PETSC_FALSE; 3384 } else { 3385 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3386 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3387 for (i=0; i<mbs; i++) { 3388 c->imax[i] = a->imax[i]; 3389 c->ilen[i] = a->ilen[i]; 3390 } 3391 c->free_imax_ilen = PETSC_TRUE; 3392 } 3393 3394 /* allocate the matrix space */ 3395 if (mallocmatspace) { 3396 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3397 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3398 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3399 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3400 3401 c->i = a->i; 3402 c->j = a->j; 3403 c->singlemalloc = PETSC_FALSE; 3404 c->free_a = PETSC_TRUE; 3405 c->free_ij = PETSC_FALSE; 3406 c->parent = A; 3407 C->preallocated = PETSC_TRUE; 3408 C->assembled = PETSC_TRUE; 3409 3410 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3411 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3412 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3413 } else { 3414 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3415 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3416 3417 c->singlemalloc = PETSC_TRUE; 3418 c->free_a = PETSC_TRUE; 3419 c->free_ij = PETSC_TRUE; 3420 3421 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3422 if (mbs > 0) { 3423 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3424 if (cpvalues == MAT_COPY_VALUES) { 3425 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3426 } else { 3427 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3428 } 3429 } 3430 C->preallocated = PETSC_TRUE; 3431 C->assembled = PETSC_TRUE; 3432 } 3433 } 3434 3435 c->roworiented = a->roworiented; 3436 c->nonew = a->nonew; 3437 3438 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3439 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3440 3441 c->bs2 = a->bs2; 3442 c->mbs = a->mbs; 3443 c->nbs = a->nbs; 3444 3445 if (a->diag) { 3446 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3447 c->diag = a->diag; 3448 c->free_diag = PETSC_FALSE; 3449 } else { 3450 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3451 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3452 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3453 c->free_diag = PETSC_TRUE; 3454 } 3455 } else c->diag = 0; 3456 3457 c->nz = a->nz; 3458 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3459 c->solve_work = 0; 3460 c->mult_work = 0; 3461 3462 c->compressedrow.use = a->compressedrow.use; 3463 c->compressedrow.nrows = a->compressedrow.nrows; 3464 c->compressedrow.check = a->compressedrow.check; 3465 if (a->compressedrow.use) { 3466 i = a->compressedrow.nrows; 3467 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3468 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3469 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3470 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3471 } else { 3472 c->compressedrow.use = PETSC_FALSE; 3473 c->compressedrow.i = PETSC_NULL; 3474 c->compressedrow.rindex = PETSC_NULL; 3475 } 3476 C->same_nonzero = A->same_nonzero; 3477 3478 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3479 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3480 PetscFunctionReturn(0); 3481 } 3482 3483 #undef __FUNCT__ 3484 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3485 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3486 { 3487 PetscErrorCode ierr; 3488 3489 PetscFunctionBegin; 3490 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3491 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3492 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3493 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3494 PetscFunctionReturn(0); 3495 } 3496 3497 #undef __FUNCT__ 3498 #define __FUNCT__ "MatLoad_SeqBAIJ" 3499 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3500 { 3501 Mat_SeqBAIJ *a; 3502 PetscErrorCode ierr; 3503 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3504 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3505 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3506 PetscInt *masked,nmask,tmp,bs2,ishift; 3507 PetscMPIInt size; 3508 int fd; 3509 PetscScalar *aa; 3510 MPI_Comm comm = ((PetscObject)viewer)->comm; 3511 3512 PetscFunctionBegin; 3513 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3514 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3515 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3516 bs2 = bs*bs; 3517 3518 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3519 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3520 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3521 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3522 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3523 M = header[1]; N = header[2]; nz = header[3]; 3524 3525 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3526 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3527 3528 /* 3529 This code adds extra rows to make sure the number of rows is 3530 divisible by the blocksize 3531 */ 3532 mbs = M/bs; 3533 extra_rows = bs - M + bs*(mbs); 3534 if (extra_rows == bs) extra_rows = 0; 3535 else mbs++; 3536 if (extra_rows) { 3537 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3538 } 3539 3540 /* Set global sizes if not already set */ 3541 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3542 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3543 } else { /* Check if the matrix global sizes are correct */ 3544 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3545 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3546 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3547 } 3548 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3549 } 3550 3551 /* read in row lengths */ 3552 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3553 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3554 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3555 3556 /* read in column indices */ 3557 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3558 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3559 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3560 3561 /* loop over row lengths determining block row lengths */ 3562 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3563 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3564 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3565 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3566 rowcount = 0; 3567 nzcount = 0; 3568 for (i=0; i<mbs; i++) { 3569 nmask = 0; 3570 for (j=0; j<bs; j++) { 3571 kmax = rowlengths[rowcount]; 3572 for (k=0; k<kmax; k++) { 3573 tmp = jj[nzcount++]/bs; 3574 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3575 } 3576 rowcount++; 3577 } 3578 browlengths[i] += nmask; 3579 /* zero out the mask elements we set */ 3580 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3581 } 3582 3583 /* Do preallocation */ 3584 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3585 a = (Mat_SeqBAIJ*)newmat->data; 3586 3587 /* set matrix "i" values */ 3588 a->i[0] = 0; 3589 for (i=1; i<= mbs; i++) { 3590 a->i[i] = a->i[i-1] + browlengths[i-1]; 3591 a->ilen[i-1] = browlengths[i-1]; 3592 } 3593 a->nz = 0; 3594 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3595 3596 /* read in nonzero values */ 3597 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3598 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3599 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3600 3601 /* set "a" and "j" values into matrix */ 3602 nzcount = 0; jcount = 0; 3603 for (i=0; i<mbs; i++) { 3604 nzcountb = nzcount; 3605 nmask = 0; 3606 for (j=0; j<bs; j++) { 3607 kmax = rowlengths[i*bs+j]; 3608 for (k=0; k<kmax; k++) { 3609 tmp = jj[nzcount++]/bs; 3610 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3611 } 3612 } 3613 /* sort the masked values */ 3614 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3615 3616 /* set "j" values into matrix */ 3617 maskcount = 1; 3618 for (j=0; j<nmask; j++) { 3619 a->j[jcount++] = masked[j]; 3620 mask[masked[j]] = maskcount++; 3621 } 3622 /* set "a" values into matrix */ 3623 ishift = bs2*a->i[i]; 3624 for (j=0; j<bs; j++) { 3625 kmax = rowlengths[i*bs+j]; 3626 for (k=0; k<kmax; k++) { 3627 tmp = jj[nzcountb]/bs; 3628 block = mask[tmp] - 1; 3629 point = jj[nzcountb] - bs*tmp; 3630 idx = ishift + bs2*block + j + bs*point; 3631 a->a[idx] = (MatScalar)aa[nzcountb++]; 3632 } 3633 } 3634 /* zero out the mask elements we set */ 3635 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3636 } 3637 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3638 3639 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3640 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3641 ierr = PetscFree(aa);CHKERRQ(ierr); 3642 ierr = PetscFree(jj);CHKERRQ(ierr); 3643 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3644 3645 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3646 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 PetscFunctionReturn(0); 3648 } 3649 3650 #undef __FUNCT__ 3651 #define __FUNCT__ "MatCreateSeqBAIJ" 3652 /*@C 3653 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3654 compressed row) format. For good matrix assembly performance the 3655 user should preallocate the matrix storage by setting the parameter nz 3656 (or the array nnz). By setting these parameters accurately, performance 3657 during matrix assembly can be increased by more than a factor of 50. 3658 3659 Collective on MPI_Comm 3660 3661 Input Parameters: 3662 + comm - MPI communicator, set to PETSC_COMM_SELF 3663 . bs - size of block 3664 . m - number of rows 3665 . n - number of columns 3666 . nz - number of nonzero blocks per block row (same for all rows) 3667 - nnz - array containing the number of nonzero blocks in the various block rows 3668 (possibly different for each block row) or PETSC_NULL 3669 3670 Output Parameter: 3671 . A - the matrix 3672 3673 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3674 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3675 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3676 3677 Options Database Keys: 3678 . -mat_no_unroll - uses code that does not unroll the loops in the 3679 block calculations (much slower) 3680 . -mat_block_size - size of the blocks to use 3681 3682 Level: intermediate 3683 3684 Notes: 3685 The number of rows and columns must be divisible by blocksize. 3686 3687 If the nnz parameter is given then the nz parameter is ignored 3688 3689 A nonzero block is any block that as 1 or more nonzeros in it 3690 3691 The block AIJ format is fully compatible with standard Fortran 77 3692 storage. That is, the stored row and column indices can begin at 3693 either one (as in Fortran) or zero. See the users' manual for details. 3694 3695 Specify the preallocated storage with either nz or nnz (not both). 3696 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3697 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3698 matrices. 3699 3700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3701 @*/ 3702 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3703 { 3704 PetscErrorCode ierr; 3705 3706 PetscFunctionBegin; 3707 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3708 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3709 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3710 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3711 PetscFunctionReturn(0); 3712 } 3713 3714 #undef __FUNCT__ 3715 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3716 /*@C 3717 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3718 per row in the matrix. For good matrix assembly performance the 3719 user should preallocate the matrix storage by setting the parameter nz 3720 (or the array nnz). By setting these parameters accurately, performance 3721 during matrix assembly can be increased by more than a factor of 50. 3722 3723 Collective on MPI_Comm 3724 3725 Input Parameters: 3726 + A - the matrix 3727 . bs - size of block 3728 . nz - number of block nonzeros per block row (same for all rows) 3729 - nnz - array containing the number of block nonzeros in the various block rows 3730 (possibly different for each block row) or PETSC_NULL 3731 3732 Options Database Keys: 3733 . -mat_no_unroll - uses code that does not unroll the loops in the 3734 block calculations (much slower) 3735 . -mat_block_size - size of the blocks to use 3736 3737 Level: intermediate 3738 3739 Notes: 3740 If the nnz parameter is given then the nz parameter is ignored 3741 3742 You can call MatGetInfo() to get information on how effective the preallocation was; 3743 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3744 You can also run with the option -info and look for messages with the string 3745 malloc in them to see if additional memory allocation was needed. 3746 3747 The block AIJ format is fully compatible with standard Fortran 77 3748 storage. That is, the stored row and column indices can begin at 3749 either one (as in Fortran) or zero. See the users' manual for details. 3750 3751 Specify the preallocated storage with either nz or nnz (not both). 3752 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3753 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3754 3755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3756 @*/ 3757 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3758 { 3759 PetscErrorCode ierr; 3760 3761 PetscFunctionBegin; 3762 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3763 PetscValidType(B,1); 3764 PetscValidLogicalCollectiveInt(B,bs,2); 3765 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3766 PetscFunctionReturn(0); 3767 } 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3771 /*@C 3772 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3773 (the default sequential PETSc format). 3774 3775 Collective on MPI_Comm 3776 3777 Input Parameters: 3778 + A - the matrix 3779 . i - the indices into j for the start of each local row (starts with zero) 3780 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3781 - v - optional values in the matrix 3782 3783 Level: developer 3784 3785 .keywords: matrix, aij, compressed row, sparse 3786 3787 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3788 @*/ 3789 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3790 { 3791 PetscErrorCode ierr; 3792 3793 PetscFunctionBegin; 3794 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3795 PetscValidType(B,1); 3796 PetscValidLogicalCollectiveInt(B,bs,2); 3797 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3798 PetscFunctionReturn(0); 3799 } 3800 3801 3802 #undef __FUNCT__ 3803 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3804 /*@ 3805 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3806 3807 Collective on MPI_Comm 3808 3809 Input Parameters: 3810 + comm - must be an MPI communicator of size 1 3811 . bs - size of block 3812 . m - number of rows 3813 . n - number of columns 3814 . i - row indices 3815 . j - column indices 3816 - a - matrix values 3817 3818 Output Parameter: 3819 . mat - the matrix 3820 3821 Level: advanced 3822 3823 Notes: 3824 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3825 once the matrix is destroyed 3826 3827 You cannot set new nonzero locations into this matrix, that will generate an error. 3828 3829 The i and j indices are 0 based 3830 3831 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3832 3833 3834 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3835 3836 @*/ 3837 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3838 { 3839 PetscErrorCode ierr; 3840 PetscInt ii; 3841 Mat_SeqBAIJ *baij; 3842 3843 PetscFunctionBegin; 3844 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3845 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3846 3847 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3848 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3849 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3850 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3851 baij = (Mat_SeqBAIJ*)(*mat)->data; 3852 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3853 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3854 3855 baij->i = i; 3856 baij->j = j; 3857 baij->a = a; 3858 3859 baij->singlemalloc = PETSC_FALSE; 3860 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3861 baij->free_a = PETSC_FALSE; 3862 baij->free_ij = PETSC_FALSE; 3863 3864 for (ii=0; ii<m; ii++) { 3865 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3866 #if defined(PETSC_USE_DEBUG) 3867 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3868 #endif 3869 } 3870 #if defined(PETSC_USE_DEBUG) 3871 for (ii=0; ii<baij->i[m]; ii++) { 3872 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3873 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3874 } 3875 #endif 3876 3877 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 PetscFunctionReturn(0); 3880 } 3881