1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 166 x[i2] = idiag[0]*s1; 167 idiag += 1; 168 i2 += 1; 169 } 170 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 171 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 172 } 173 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 174 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 175 i2 = 0; 176 mdiag = a->idiag+a->mbs; 177 for (i=0; i<m; i++) { 178 x1 = x[i2]; 179 x[i2] = mdiag[0]*x1; 180 mdiag += 1; 181 i2 += 1; 182 } 183 ierr = PetscLogFlops(m);CHKERRQ(ierr); 184 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 185 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 186 } 187 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 188 idiag = a->idiag+a->mbs - 1; 189 i2 = m - 1; 190 x1 = x[i2]; 191 x[i2] = idiag[0]*x1; 192 idiag -= 1; 193 i2 -= 1; 194 for (i=m-2; i>=0; i--) { 195 v = aa + (diag[i]+1); 196 vi = aj + diag[i] + 1; 197 nz = ai[i+1] - diag[i] - 1; 198 s1 = x[i2]; 199 for (j=0; j<nz; j++) s1 -= v[j]*x[vi[j]]; 200 x[i2] = idiag[0]*s1; 201 idiag -= 1; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 #undef __FUNCT__ 213 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 214 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 215 { 216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 217 PetscScalar *x,x1,x2,s1,s2; 218 const PetscScalar *b; 219 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 220 PetscErrorCode ierr; 221 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 222 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 223 224 PetscFunctionBegin; 225 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 226 its = its*lits; 227 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 228 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 229 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 230 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 231 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 232 233 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 234 235 diag = a->diag; 236 idiag = a->idiag; 237 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 238 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 239 240 if (flag & SOR_ZERO_INITIAL_GUESS) { 241 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 242 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 243 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 244 i2 = 2; 245 idiag += 4; 246 for (i=1; i<m; i++) { 247 v = aa + 4*ai[i]; 248 vi = aj + ai[i]; 249 nz = diag[i] - ai[i]; 250 s1 = b[i2]; s2 = b[i2+1]; 251 for (j=0; j<nz; j++) { 252 idx = 2*vi[j]; 253 it = 4*j; 254 x1 = x[idx]; x2 = x[1+idx]; 255 s1 -= v[it]*x1 + v[it+2]*x2; 256 s2 -= v[it+1]*x1 + v[it+3]*x2; 257 } 258 x[i2] = idiag[0]*s1 + idiag[2]*s2; 259 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 260 idiag += 4; 261 i2 += 2; 262 } 263 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 264 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 265 } 266 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 267 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 268 i2 = 0; 269 mdiag = a->idiag+4*a->mbs; 270 for (i=0; i<m; i++) { 271 x1 = x[i2]; x2 = x[i2+1]; 272 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 273 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 274 mdiag += 4; 275 i2 += 2; 276 } 277 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 278 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 279 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 280 } 281 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 282 idiag = a->idiag+4*a->mbs - 4; 283 i2 = 2*m - 2; 284 x1 = x[i2]; x2 = x[i2+1]; 285 x[i2] = idiag[0]*x1 + idiag[2]*x2; 286 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 287 idiag -= 4; 288 i2 -= 2; 289 for (i=m-2; i>=0; i--) { 290 v = aa + 4*(diag[i]+1); 291 vi = aj + diag[i] + 1; 292 nz = ai[i+1] - diag[i] - 1; 293 s1 = x[i2]; s2 = x[i2+1]; 294 for (j=0; j<nz; j++) { 295 idx = 2*vi[j]; 296 it = 4*j; 297 x1 = x[idx]; x2 = x[1+idx]; 298 s1 -= v[it]*x1 + v[it+2]*x2; 299 s2 -= v[it+1]*x1 + v[it+3]*x2; 300 } 301 x[i2] = idiag[0]*s1 + idiag[2]*s2; 302 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 303 idiag -= 4; 304 i2 -= 2; 305 } 306 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 307 } 308 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 309 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 310 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 316 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 317 { 318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 319 PetscScalar *x,x1,x2,x3,s1,s2,s3; 320 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 321 const PetscScalar *b; 322 PetscErrorCode ierr; 323 PetscInt m = a->mbs,i,i2,nz,idx; 324 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 325 326 PetscFunctionBegin; 327 its = its*lits; 328 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 329 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 330 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 331 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 332 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 333 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 334 335 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 336 337 diag = a->diag; 338 idiag = a->idiag; 339 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 341 342 if (flag & SOR_ZERO_INITIAL_GUESS) { 343 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 344 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 345 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 346 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 347 i2 = 3; 348 idiag += 9; 349 for (i=1; i<m; i++) { 350 v = aa + 9*ai[i]; 351 vi = aj + ai[i]; 352 nz = diag[i] - ai[i]; 353 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 354 while (nz--) { 355 idx = 3*(*vi++); 356 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 357 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 358 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 359 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 360 v += 9; 361 } 362 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 363 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 364 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 365 idiag += 9; 366 i2 += 3; 367 } 368 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 369 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 370 } 371 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 372 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 373 i2 = 0; 374 mdiag = a->idiag+9*a->mbs; 375 for (i=0; i<m; i++) { 376 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 377 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 378 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 379 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 380 mdiag += 9; 381 i2 += 3; 382 } 383 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 384 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 385 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 386 } 387 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 388 idiag = a->idiag+9*a->mbs - 9; 389 i2 = 3*m - 3; 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 391 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 392 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 393 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 394 idiag -= 9; 395 i2 -= 3; 396 for (i=m-2; i>=0; i--) { 397 v = aa + 9*(diag[i]+1); 398 vi = aj + diag[i] + 1; 399 nz = ai[i+1] - diag[i] - 1; 400 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 401 while (nz--) { 402 idx = 3*(*vi++); 403 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 404 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 405 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 406 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 407 v += 9; 408 } 409 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 410 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 411 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 412 idiag -= 9; 413 i2 -= 3; 414 } 415 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 416 } 417 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 418 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 419 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 425 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 428 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 429 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 430 const PetscScalar *b; 431 PetscErrorCode ierr; 432 PetscInt m = a->mbs,i,i2,nz,idx; 433 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 434 435 PetscFunctionBegin; 436 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 437 its = its*lits; 438 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 439 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 440 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 441 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 442 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 443 444 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 445 446 diag = a->diag; 447 idiag = a->idiag; 448 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 449 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 450 451 if (flag & SOR_ZERO_INITIAL_GUESS) { 452 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 453 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 454 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 455 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 456 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 457 i2 = 4; 458 idiag += 16; 459 for (i=1; i<m; i++) { 460 v = aa + 16*ai[i]; 461 vi = aj + ai[i]; 462 nz = diag[i] - ai[i]; 463 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 464 while (nz--) { 465 idx = 4*(*vi++); 466 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 467 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 468 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 469 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 470 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 471 v += 16; 472 } 473 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 474 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 475 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 476 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 477 idiag += 16; 478 i2 += 4; 479 } 480 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 481 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 482 } 483 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 484 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 485 i2 = 0; 486 mdiag = a->idiag+16*a->mbs; 487 for (i=0; i<m; i++) { 488 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 489 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 490 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 491 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 492 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 493 mdiag += 16; 494 i2 += 4; 495 } 496 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 497 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 498 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 499 } 500 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 501 idiag = a->idiag+16*a->mbs - 16; 502 i2 = 4*m - 4; 503 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 504 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 505 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 506 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 507 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 508 idiag -= 16; 509 i2 -= 4; 510 for (i=m-2; i>=0; i--) { 511 v = aa + 16*(diag[i]+1); 512 vi = aj + diag[i] + 1; 513 nz = ai[i+1] - diag[i] - 1; 514 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 515 while (nz--) { 516 idx = 4*(*vi++); 517 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 518 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 519 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 520 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 521 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 522 v += 16; 523 } 524 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 525 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 526 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 527 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 528 idiag -= 16; 529 i2 -= 4; 530 } 531 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 532 } 533 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 534 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 535 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 541 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 545 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 546 const PetscScalar *b; 547 PetscErrorCode ierr; 548 PetscInt m = a->mbs,i,i2,nz,idx; 549 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 550 551 PetscFunctionBegin; 552 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 553 its = its*lits; 554 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 555 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 556 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 557 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 558 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 559 560 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 561 562 diag = a->diag; 563 idiag = a->idiag; 564 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 565 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 566 567 if (flag & SOR_ZERO_INITIAL_GUESS) { 568 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 569 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 570 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 571 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 572 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 573 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 574 i2 = 5; 575 idiag += 25; 576 for (i=1; i<m; i++) { 577 v = aa + 25*ai[i]; 578 vi = aj + ai[i]; 579 nz = diag[i] - ai[i]; 580 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 581 while (nz--) { 582 idx = 5*(*vi++); 583 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 584 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 585 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 586 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 587 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 588 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 589 v += 25; 590 } 591 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 592 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 593 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 594 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 595 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 596 idiag += 25; 597 i2 += 5; 598 } 599 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 600 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 601 } 602 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 603 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 604 i2 = 0; 605 mdiag = a->idiag+25*a->mbs; 606 for (i=0; i<m; i++) { 607 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 608 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 609 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 610 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 611 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 612 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 613 mdiag += 25; 614 i2 += 5; 615 } 616 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 617 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 618 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 619 } 620 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 621 idiag = a->idiag+25*a->mbs - 25; 622 i2 = 5*m - 5; 623 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 624 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 625 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 626 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 627 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 628 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 629 idiag -= 25; 630 i2 -= 5; 631 for (i=m-2; i>=0; i--) { 632 v = aa + 25*(diag[i]+1); 633 vi = aj + diag[i] + 1; 634 nz = ai[i+1] - diag[i] - 1; 635 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 636 while (nz--) { 637 idx = 5*(*vi++); 638 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 639 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 640 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 641 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 642 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 643 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 644 v += 25; 645 } 646 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 647 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 648 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 649 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 650 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 651 idiag -= 25; 652 i2 -= 5; 653 } 654 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 655 } 656 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 657 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 658 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 664 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 665 { 666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 667 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 668 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 669 const PetscScalar *b; 670 PetscErrorCode ierr; 671 PetscInt m = a->mbs,i,i2,nz,idx; 672 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 673 674 PetscFunctionBegin; 675 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 676 its = its*lits; 677 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 678 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 679 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 680 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 681 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 682 683 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 684 685 diag = a->diag; 686 idiag = a->idiag; 687 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 688 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 689 690 if (flag & SOR_ZERO_INITIAL_GUESS) { 691 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 692 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 693 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 694 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 695 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 696 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 697 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 698 i2 = 6; 699 idiag += 36; 700 for (i=1; i<m; i++) { 701 v = aa + 36*ai[i]; 702 vi = aj + ai[i]; 703 nz = diag[i] - ai[i]; 704 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 705 while (nz--) { 706 idx = 6*(*vi++); 707 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 708 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 709 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 710 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 711 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 712 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 713 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 714 v += 36; 715 } 716 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 717 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 718 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 719 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 720 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 721 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 722 idiag += 36; 723 i2 += 6; 724 } 725 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 726 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 727 } 728 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 729 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 730 i2 = 0; 731 mdiag = a->idiag+36*a->mbs; 732 for (i=0; i<m; i++) { 733 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 734 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 735 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 736 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 737 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 738 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 739 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 740 mdiag += 36; 741 i2 += 6; 742 } 743 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 744 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 745 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 746 } 747 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 748 idiag = a->idiag+36*a->mbs - 36; 749 i2 = 6*m - 6; 750 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 751 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 752 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 753 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 754 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 755 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 756 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 757 idiag -= 36; 758 i2 -= 6; 759 for (i=m-2; i>=0; i--) { 760 v = aa + 36*(diag[i]+1); 761 vi = aj + diag[i] + 1; 762 nz = ai[i+1] - diag[i] - 1; 763 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 764 while (nz--) { 765 idx = 6*(*vi++); 766 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 767 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 768 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 769 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 770 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 771 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 772 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 773 v += 36; 774 } 775 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 776 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 777 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 778 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 779 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 780 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 781 idiag -= 36; 782 i2 -= 6; 783 } 784 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 785 } 786 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 787 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 788 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 794 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 795 { 796 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 797 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 798 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 799 const PetscScalar *b; 800 PetscErrorCode ierr; 801 PetscInt m = a->mbs,i,i2,nz,idx; 802 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 803 804 PetscFunctionBegin; 805 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 806 its = its*lits; 807 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 808 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 809 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 810 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 811 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 812 813 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 814 815 diag = a->diag; 816 idiag = a->idiag; 817 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 818 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 819 820 if (flag & SOR_ZERO_INITIAL_GUESS) { 821 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 822 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 823 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 824 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 825 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 826 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 827 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 828 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 829 i2 = 7; 830 idiag += 49; 831 for (i=1; i<m; i++) { 832 v = aa + 49*ai[i]; 833 vi = aj + ai[i]; 834 nz = diag[i] - ai[i]; 835 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 836 while (nz--) { 837 idx = 7*(*vi++); 838 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 839 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 840 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 841 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 842 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 843 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 844 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 845 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 846 v += 49; 847 } 848 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 849 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 850 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 851 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 852 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 853 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 854 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 855 idiag += 49; 856 i2 += 7; 857 } 858 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 859 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 860 } 861 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 862 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 863 i2 = 0; 864 mdiag = a->idiag+49*a->mbs; 865 for (i=0; i<m; i++) { 866 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 867 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 868 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 869 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 870 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 871 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 872 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 873 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 874 mdiag += 49; 875 i2 += 7; 876 } 877 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 878 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 879 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 882 idiag = a->idiag+49*a->mbs - 49; 883 i2 = 7*m - 7; 884 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 885 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 886 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 887 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 888 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 889 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 890 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 891 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 892 idiag -= 49; 893 i2 -= 7; 894 for (i=m-2; i>=0; i--) { 895 v = aa + 49*(diag[i]+1); 896 vi = aj + diag[i] + 1; 897 nz = ai[i+1] - diag[i] - 1; 898 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 899 while (nz--) { 900 idx = 7*(*vi++); 901 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 902 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 903 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 904 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 905 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 906 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 907 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 908 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 909 v += 49; 910 } 911 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 912 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 913 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 914 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 915 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 916 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 917 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 918 idiag -= 49; 919 i2 -= 7; 920 } 921 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 922 } 923 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 924 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 925 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 931 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 932 { 933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 934 PetscScalar *x,*work,*w,*workt; 935 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 936 const PetscScalar *b; 937 PetscErrorCode ierr; 938 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 939 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 940 941 PetscFunctionBegin; 942 its = its*lits; 943 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 944 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 945 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 946 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 947 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 948 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 949 950 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 951 952 diag = a->diag; 953 idiag = a->idiag; 954 if (!a->mult_work) { 955 k = PetscMax(A->rmap->n,A->cmap->n); 956 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 957 } 958 work = a->mult_work; 959 if (!a->sor_work) { 960 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 961 } 962 w = a->sor_work; 963 964 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 966 967 if (flag & SOR_ZERO_INITIAL_GUESS) { 968 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 969 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 970 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 971 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 972 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 973 i2 = bs; 974 idiag += bs2; 975 for (i=1; i<m; i++) { 976 v = aa + bs2*ai[i]; 977 vi = aj + ai[i]; 978 nz = diag[i] - ai[i]; 979 980 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 981 /* copy all rows of x that are needed into contiguous space */ 982 workt = work; 983 for (j=0; j<nz; j++) { 984 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 985 workt += bs; 986 } 987 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 988 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 989 while (nz--) { 990 idx = N*(*vi++); 991 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 992 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 993 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 994 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 995 v += N2; 996 } */ 997 998 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 999 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1000 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1001 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1002 1003 idiag += bs2; 1004 i2 += bs; 1005 } 1006 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1007 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1008 } 1009 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1010 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1011 i2 = 0; 1012 mdiag = a->idiag+bs2*a->mbs; 1013 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1014 for (i=0; i<m; i++) { 1015 PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1016 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1017 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1018 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1019 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1020 1021 mdiag += bs2; 1022 i2 += bs; 1023 } 1024 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1025 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1026 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1027 } 1028 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1029 idiag = a->idiag+bs2*a->mbs - bs2; 1030 i2 = bs*m - bs; 1031 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1033 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1034 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1035 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1036 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1037 idiag -= bs2; 1038 i2 -= bs; 1039 for (i=m-2; i>=0; i--) { 1040 v = aa + bs2*(diag[i]+1); 1041 vi = aj + diag[i] + 1; 1042 nz = ai[i+1] - diag[i] - 1; 1043 1044 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1045 /* copy all rows of x that are needed into contiguous space */ 1046 workt = work; 1047 for (j=0; j<nz; j++) { 1048 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1049 workt += bs; 1050 } 1051 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1052 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1053 while (nz--) { 1054 idx = N*(*vi++); 1055 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1056 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1057 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1058 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1059 v += N2; 1060 } */ 1061 1062 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1063 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1064 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1065 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1066 idiag -= bs2; 1067 i2 -= bs; 1068 } 1069 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1070 } 1071 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 /* 1078 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1079 */ 1080 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1081 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1082 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1083 #define matsetvaluesblocked4_ matsetvaluesblocked4 1084 #endif 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "matsetvaluesblocked4_" 1088 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1089 { 1090 Mat A = *AA; 1091 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1092 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1093 PetscInt *ai =a->i,*ailen=a->ilen; 1094 PetscInt *aj =a->j,stepval,lastcol = -1; 1095 const PetscScalar *value = v; 1096 MatScalar *ap,*aa = a->a,*bap; 1097 1098 PetscFunctionBegin; 1099 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1100 stepval = (n-1)*4; 1101 for (k=0; k<m; k++) { /* loop over added rows */ 1102 row = im[k]; 1103 rp = aj + ai[row]; 1104 ap = aa + 16*ai[row]; 1105 nrow = ailen[row]; 1106 low = 0; 1107 high = nrow; 1108 for (l=0; l<n; l++) { /* loop over added columns */ 1109 col = in[l]; 1110 if (col <= lastcol) low = 0; 1111 else high = nrow; 1112 lastcol = col; 1113 value = v + k*(stepval+4 + l)*4; 1114 while (high-low > 7) { 1115 t = (low+high)/2; 1116 if (rp[t] > col) high = t; 1117 else low = t; 1118 } 1119 for (i=low; i<high; i++) { 1120 if (rp[i] > col) break; 1121 if (rp[i] == col) { 1122 bap = ap + 16*i; 1123 for (ii=0; ii<4; ii++,value+=stepval) { 1124 for (jj=ii; jj<16; jj+=4) { 1125 bap[jj] += *value++; 1126 } 1127 } 1128 goto noinsert2; 1129 } 1130 } 1131 N = nrow++ - 1; 1132 high++; /* added new column index thus must search to one higher than before */ 1133 /* shift up all the later entries in this row */ 1134 for (ii=N; ii>=i; ii--) { 1135 rp[ii+1] = rp[ii]; 1136 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1137 } 1138 if (N >= i) { 1139 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1140 } 1141 rp[i] = col; 1142 bap = ap + 16*i; 1143 for (ii=0; ii<4; ii++,value+=stepval) { 1144 for (jj=ii; jj<16; jj+=4) { 1145 bap[jj] = *value++; 1146 } 1147 } 1148 noinsert2:; 1149 low = i; 1150 } 1151 ailen[row] = nrow; 1152 } 1153 PetscFunctionReturnVoid(); 1154 } 1155 1156 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1157 #define matsetvalues4_ MATSETVALUES4 1158 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1159 #define matsetvalues4_ matsetvalues4 1160 #endif 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatSetValues4_" 1164 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1165 { 1166 Mat A = *AA; 1167 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1168 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1169 PetscInt *ai=a->i,*ailen=a->ilen; 1170 PetscInt *aj=a->j,brow,bcol; 1171 PetscInt ridx,cidx,lastcol = -1; 1172 MatScalar *ap,value,*aa=a->a,*bap; 1173 1174 PetscFunctionBegin; 1175 for (k=0; k<m; k++) { /* loop over added rows */ 1176 row = im[k]; brow = row/4; 1177 rp = aj + ai[brow]; 1178 ap = aa + 16*ai[brow]; 1179 nrow = ailen[brow]; 1180 low = 0; 1181 high = nrow; 1182 for (l=0; l<n; l++) { /* loop over added columns */ 1183 col = in[l]; bcol = col/4; 1184 ridx = row % 4; cidx = col % 4; 1185 value = v[l + k*n]; 1186 if (col <= lastcol) low = 0; 1187 else high = nrow; 1188 lastcol = col; 1189 while (high-low > 7) { 1190 t = (low+high)/2; 1191 if (rp[t] > bcol) high = t; 1192 else low = t; 1193 } 1194 for (i=low; i<high; i++) { 1195 if (rp[i] > bcol) break; 1196 if (rp[i] == bcol) { 1197 bap = ap + 16*i + 4*cidx + ridx; 1198 *bap += value; 1199 goto noinsert1; 1200 } 1201 } 1202 N = nrow++ - 1; 1203 high++; /* added new column thus must search to one higher than before */ 1204 /* shift up all the later entries in this row */ 1205 for (ii=N; ii>=i; ii--) { 1206 rp[ii+1] = rp[ii]; 1207 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1208 } 1209 if (N>=i) { 1210 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1211 } 1212 rp[i] = bcol; 1213 ap[16*i + 4*cidx + ridx] = value; 1214 noinsert1:; 1215 low = i; 1216 } 1217 ailen[brow] = nrow; 1218 } 1219 PetscFunctionReturnVoid(); 1220 } 1221 1222 /* 1223 Checks for missing diagonals 1224 */ 1225 #undef __FUNCT__ 1226 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1227 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1228 { 1229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1230 PetscErrorCode ierr; 1231 PetscInt *diag,*jj = a->j,i; 1232 1233 PetscFunctionBegin; 1234 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1235 *missing = PETSC_FALSE; 1236 if (A->rmap->n > 0 && !jj) { 1237 *missing = PETSC_TRUE; 1238 if (d) *d = 0; 1239 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1240 } else { 1241 diag = a->diag; 1242 for (i=0; i<a->mbs; i++) { 1243 if (jj[diag[i]] != i) { 1244 *missing = PETSC_TRUE; 1245 if (d) *d = i; 1246 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1247 break; 1248 } 1249 } 1250 } 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1256 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1257 { 1258 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1259 PetscErrorCode ierr; 1260 PetscInt i,j,m = a->mbs; 1261 1262 PetscFunctionBegin; 1263 if (!a->diag) { 1264 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1265 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1266 a->free_diag = PETSC_TRUE; 1267 } 1268 for (i=0; i<m; i++) { 1269 a->diag[i] = a->i[i+1]; 1270 for (j=a->i[i]; j<a->i[i+1]; j++) { 1271 if (a->j[j] == i) { 1272 a->diag[i] = j; 1273 break; 1274 } 1275 } 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1283 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1284 { 1285 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1286 PetscErrorCode ierr; 1287 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1288 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1289 1290 PetscFunctionBegin; 1291 *nn = n; 1292 if (!ia) PetscFunctionReturn(0); 1293 if (symmetric) { 1294 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1295 nz = tia[n]; 1296 } else { 1297 tia = a->i; tja = a->j; 1298 } 1299 1300 if (!blockcompressed && bs > 1) { 1301 (*nn) *= bs; 1302 /* malloc & create the natural set of indices */ 1303 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1304 if (n) { 1305 (*ia)[0] = 0; 1306 for (j=1; j<bs; j++) { 1307 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1308 } 1309 } 1310 1311 for (i=1; i<n; i++) { 1312 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1313 for (j=1; j<bs; j++) { 1314 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1315 } 1316 } 1317 if (n) { 1318 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1319 } 1320 1321 if (inja) { 1322 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1323 cnt = 0; 1324 for (i=0; i<n; i++) { 1325 for (j=0; j<bs; j++) { 1326 for (k=tia[i]; k<tia[i+1]; k++) { 1327 for (l=0; l<bs; l++) { 1328 (*ja)[cnt++] = bs*tja[k] + l; 1329 } 1330 } 1331 } 1332 } 1333 } 1334 1335 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1336 ierr = PetscFree(tia);CHKERRQ(ierr); 1337 ierr = PetscFree(tja);CHKERRQ(ierr); 1338 } 1339 } else if (oshift == 1) { 1340 if (symmetric) { 1341 nz = tia[A->rmap->n/bs]; 1342 /* add 1 to i and j indices */ 1343 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1344 *ia = tia; 1345 if (ja) { 1346 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1347 *ja = tja; 1348 } 1349 } else { 1350 nz = a->i[A->rmap->n/bs]; 1351 /* malloc space and add 1 to i and j indices */ 1352 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1353 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1354 if (ja) { 1355 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1356 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1357 } 1358 } 1359 } else { 1360 *ia = tia; 1361 if (ja) *ja = tja; 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1368 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1369 { 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 if (!ia) PetscFunctionReturn(0); 1374 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1375 ierr = PetscFree(*ia);CHKERRQ(ierr); 1376 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1377 } 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1383 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1384 { 1385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1386 PetscErrorCode ierr; 1387 1388 PetscFunctionBegin; 1389 #if defined(PETSC_USE_LOG) 1390 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1391 #endif 1392 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1393 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1394 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1395 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1396 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1397 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1398 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1399 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1400 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1401 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1402 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1403 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1404 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1405 1406 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1407 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1408 ierr = PetscFree(A->data);CHKERRQ(ierr); 1409 1410 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1411 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1412 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1413 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1414 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1415 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1416 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1417 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1418 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1426 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1427 { 1428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 switch (op) { 1433 case MAT_ROW_ORIENTED: 1434 a->roworiented = flg; 1435 break; 1436 case MAT_KEEP_NONZERO_PATTERN: 1437 a->keepnonzeropattern = flg; 1438 break; 1439 case MAT_NEW_NONZERO_LOCATIONS: 1440 a->nonew = (flg ? 0 : 1); 1441 break; 1442 case MAT_NEW_NONZERO_LOCATION_ERR: 1443 a->nonew = (flg ? -1 : 0); 1444 break; 1445 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1446 a->nonew = (flg ? -2 : 0); 1447 break; 1448 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1449 a->nounused = (flg ? -1 : 0); 1450 break; 1451 case MAT_CHECK_COMPRESSED_ROW: 1452 a->compressedrow.check = flg; 1453 break; 1454 case MAT_NEW_DIAGONALS: 1455 case MAT_IGNORE_OFF_PROC_ENTRIES: 1456 case MAT_USE_HASH_TABLE: 1457 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1458 break; 1459 case MAT_SPD: 1460 case MAT_SYMMETRIC: 1461 case MAT_STRUCTURALLY_SYMMETRIC: 1462 case MAT_HERMITIAN: 1463 case MAT_SYMMETRY_ETERNAL: 1464 /* These options are handled directly by MatSetOption() */ 1465 break; 1466 default: 1467 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1474 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1475 { 1476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1479 MatScalar *aa,*aa_i; 1480 PetscScalar *v_i; 1481 1482 PetscFunctionBegin; 1483 bs = A->rmap->bs; 1484 ai = a->i; 1485 aj = a->j; 1486 aa = a->a; 1487 bs2 = a->bs2; 1488 1489 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1490 1491 bn = row/bs; /* Block number */ 1492 bp = row % bs; /* Block Position */ 1493 M = ai[bn+1] - ai[bn]; 1494 *nz = bs*M; 1495 1496 if (v) { 1497 *v = 0; 1498 if (*nz) { 1499 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1500 for (i=0; i<M; i++) { /* for each block in the block row */ 1501 v_i = *v + i*bs; 1502 aa_i = aa + bs2*(ai[bn] + i); 1503 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1504 } 1505 } 1506 } 1507 1508 if (idx) { 1509 *idx = 0; 1510 if (*nz) { 1511 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1512 for (i=0; i<M; i++) { /* for each block in the block row */ 1513 idx_i = *idx + i*bs; 1514 itmp = bs*aj[ai[bn] + i]; 1515 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1516 } 1517 } 1518 } 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1524 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1525 { 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1530 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1531 PetscFunctionReturn(0); 1532 } 1533 1534 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1538 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1539 { 1540 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1541 Mat C; 1542 PetscErrorCode ierr; 1543 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1544 PetscInt *rows,*cols,bs2=a->bs2; 1545 MatScalar *array; 1546 1547 PetscFunctionBegin; 1548 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1549 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1550 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1551 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1552 1553 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1554 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1555 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1556 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1557 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1558 ierr = PetscFree(col);CHKERRQ(ierr); 1559 } else { 1560 C = *B; 1561 } 1562 1563 array = a->a; 1564 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1565 for (i=0; i<mbs; i++) { 1566 cols[0] = i*bs; 1567 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1568 len = ai[i+1] - ai[i]; 1569 for (j=0; j<len; j++) { 1570 rows[0] = (*aj++)*bs; 1571 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1572 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1573 array += bs2; 1574 } 1575 } 1576 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1577 1578 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1579 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1580 1581 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1582 *B = C; 1583 } else { 1584 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1591 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1592 { 1593 PetscErrorCode ierr; 1594 Mat Btrans; 1595 1596 PetscFunctionBegin; 1597 *f = PETSC_FALSE; 1598 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1599 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1600 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1606 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1607 { 1608 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1609 PetscErrorCode ierr; 1610 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1611 int fd; 1612 PetscScalar *aa; 1613 FILE *file; 1614 1615 PetscFunctionBegin; 1616 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1617 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1618 col_lens[0] = MAT_FILE_CLASSID; 1619 1620 col_lens[1] = A->rmap->N; 1621 col_lens[2] = A->cmap->n; 1622 col_lens[3] = a->nz*bs2; 1623 1624 /* store lengths of each row and write (including header) to file */ 1625 count = 0; 1626 for (i=0; i<a->mbs; i++) { 1627 for (j=0; j<bs; j++) { 1628 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1629 } 1630 } 1631 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1632 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1633 1634 /* store column indices (zero start index) */ 1635 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1636 count = 0; 1637 for (i=0; i<a->mbs; i++) { 1638 for (j=0; j<bs; j++) { 1639 for (k=a->i[i]; k<a->i[i+1]; k++) { 1640 for (l=0; l<bs; l++) { 1641 jj[count++] = bs*a->j[k] + l; 1642 } 1643 } 1644 } 1645 } 1646 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1647 ierr = PetscFree(jj);CHKERRQ(ierr); 1648 1649 /* store nonzero values */ 1650 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1651 count = 0; 1652 for (i=0; i<a->mbs; i++) { 1653 for (j=0; j<bs; j++) { 1654 for (k=a->i[i]; k<a->i[i+1]; k++) { 1655 for (l=0; l<bs; l++) { 1656 aa[count++] = a->a[bs2*k + l*bs + j]; 1657 } 1658 } 1659 } 1660 } 1661 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1662 ierr = PetscFree(aa);CHKERRQ(ierr); 1663 1664 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1665 if (file) { 1666 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1667 } 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1673 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1674 { 1675 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1676 PetscErrorCode ierr; 1677 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1678 PetscViewerFormat format; 1679 1680 PetscFunctionBegin; 1681 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1682 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1683 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1684 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1685 Mat aij; 1686 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1687 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1688 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1689 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1690 PetscFunctionReturn(0); 1691 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1692 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1693 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1694 for (i=0; i<a->mbs; i++) { 1695 for (j=0; j<bs; j++) { 1696 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1697 for (k=a->i[i]; k<a->i[i+1]; k++) { 1698 for (l=0; l<bs; l++) { 1699 #if defined(PETSC_USE_COMPLEX) 1700 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1701 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1702 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1703 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1704 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1705 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1706 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1707 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1708 } 1709 #else 1710 if (a->a[bs2*k + l*bs + j] != 0.0) { 1711 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1712 } 1713 #endif 1714 } 1715 } 1716 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1717 } 1718 } 1719 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1720 } else { 1721 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1722 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1723 for (i=0; i<a->mbs; i++) { 1724 for (j=0; j<bs; j++) { 1725 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1726 for (k=a->i[i]; k<a->i[i+1]; k++) { 1727 for (l=0; l<bs; l++) { 1728 #if defined(PETSC_USE_COMPLEX) 1729 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1730 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1731 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1732 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1733 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1734 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1735 } else { 1736 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1737 } 1738 #else 1739 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1740 #endif 1741 } 1742 } 1743 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1744 } 1745 } 1746 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1747 } 1748 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #include <petscdraw.h> 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1755 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1756 { 1757 Mat A = (Mat) Aa; 1758 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1759 PetscErrorCode ierr; 1760 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1761 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1762 MatScalar *aa; 1763 PetscViewer viewer; 1764 PetscViewerFormat format; 1765 1766 PetscFunctionBegin; 1767 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1768 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1769 1770 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1771 1772 /* loop over matrix elements drawing boxes */ 1773 1774 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1775 color = PETSC_DRAW_BLUE; 1776 for (i=0,row=0; i<mbs; i++,row+=bs) { 1777 for (j=a->i[i]; j<a->i[i+1]; j++) { 1778 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1779 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1780 aa = a->a + j*bs2; 1781 for (k=0; k<bs; k++) { 1782 for (l=0; l<bs; l++) { 1783 if (PetscRealPart(*aa++) >= 0.) continue; 1784 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1785 } 1786 } 1787 } 1788 } 1789 color = PETSC_DRAW_CYAN; 1790 for (i=0,row=0; i<mbs; i++,row+=bs) { 1791 for (j=a->i[i]; j<a->i[i+1]; j++) { 1792 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1793 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1794 aa = a->a + j*bs2; 1795 for (k=0; k<bs; k++) { 1796 for (l=0; l<bs; l++) { 1797 if (PetscRealPart(*aa++) != 0.) continue; 1798 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1799 } 1800 } 1801 } 1802 } 1803 color = PETSC_DRAW_RED; 1804 for (i=0,row=0; i<mbs; i++,row+=bs) { 1805 for (j=a->i[i]; j<a->i[i+1]; j++) { 1806 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1807 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1808 aa = a->a + j*bs2; 1809 for (k=0; k<bs; k++) { 1810 for (l=0; l<bs; l++) { 1811 if (PetscRealPart(*aa++) <= 0.) continue; 1812 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1813 } 1814 } 1815 } 1816 } 1817 } else { 1818 /* use contour shading to indicate magnitude of values */ 1819 /* first determine max of all nonzero values */ 1820 PetscDraw popup; 1821 PetscReal scale,maxv = 0.0; 1822 1823 for (i=0; i<a->nz*a->bs2; i++) { 1824 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1825 } 1826 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1827 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1828 if (popup) { 1829 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1830 } 1831 for (i=0,row=0; i<mbs; i++,row+=bs) { 1832 for (j=a->i[i]; j<a->i[i+1]; j++) { 1833 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1834 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1835 aa = a->a + j*bs2; 1836 for (k=0; k<bs; k++) { 1837 for (l=0; l<bs; l++) { 1838 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1839 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1840 } 1841 } 1842 } 1843 } 1844 } 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1850 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1851 { 1852 PetscErrorCode ierr; 1853 PetscReal xl,yl,xr,yr,w,h; 1854 PetscDraw draw; 1855 PetscBool isnull; 1856 1857 PetscFunctionBegin; 1858 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1859 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1860 1861 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1862 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1863 xr += w; yr += h; xl = -w; yl = -h; 1864 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1865 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1866 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatView_SeqBAIJ" 1872 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1873 { 1874 PetscErrorCode ierr; 1875 PetscBool iascii,isbinary,isdraw; 1876 1877 PetscFunctionBegin; 1878 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1879 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1880 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1881 if (iascii) { 1882 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1883 } else if (isbinary) { 1884 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1885 } else if (isdraw) { 1886 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1887 } else { 1888 Mat B; 1889 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1890 ierr = MatView(B,viewer);CHKERRQ(ierr); 1891 ierr = MatDestroy(&B);CHKERRQ(ierr); 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1899 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1900 { 1901 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1902 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1903 PetscInt *ai = a->i,*ailen = a->ilen; 1904 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1905 MatScalar *ap,*aa = a->a; 1906 1907 PetscFunctionBegin; 1908 for (k=0; k<m; k++) { /* loop over rows */ 1909 row = im[k]; brow = row/bs; 1910 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1911 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1912 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1913 nrow = ailen[brow]; 1914 for (l=0; l<n; l++) { /* loop over columns */ 1915 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1916 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1917 col = in[l]; 1918 bcol = col/bs; 1919 cidx = col%bs; 1920 ridx = row%bs; 1921 high = nrow; 1922 low = 0; /* assume unsorted */ 1923 while (high-low > 5) { 1924 t = (low+high)/2; 1925 if (rp[t] > bcol) high = t; 1926 else low = t; 1927 } 1928 for (i=low; i<high; i++) { 1929 if (rp[i] > bcol) break; 1930 if (rp[i] == bcol) { 1931 *v++ = ap[bs2*i+bs*cidx+ridx]; 1932 goto finished; 1933 } 1934 } 1935 *v++ = 0.0; 1936 finished:; 1937 } 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1944 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1945 { 1946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1947 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1948 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1949 PetscErrorCode ierr; 1950 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1951 PetscBool roworiented=a->roworiented; 1952 const PetscScalar *value = v; 1953 MatScalar *ap,*aa = a->a,*bap; 1954 1955 PetscFunctionBegin; 1956 if (roworiented) { 1957 stepval = (n-1)*bs; 1958 } else { 1959 stepval = (m-1)*bs; 1960 } 1961 for (k=0; k<m; k++) { /* loop over added rows */ 1962 row = im[k]; 1963 if (row < 0) continue; 1964 #if defined(PETSC_USE_DEBUG) 1965 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1966 #endif 1967 rp = aj + ai[row]; 1968 ap = aa + bs2*ai[row]; 1969 rmax = imax[row]; 1970 nrow = ailen[row]; 1971 low = 0; 1972 high = nrow; 1973 for (l=0; l<n; l++) { /* loop over added columns */ 1974 if (in[l] < 0) continue; 1975 #if defined(PETSC_USE_DEBUG) 1976 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1977 #endif 1978 col = in[l]; 1979 if (roworiented) { 1980 value = v + (k*(stepval+bs) + l)*bs; 1981 } else { 1982 value = v + (l*(stepval+bs) + k)*bs; 1983 } 1984 if (col <= lastcol) low = 0; 1985 else high = nrow; 1986 lastcol = col; 1987 while (high-low > 7) { 1988 t = (low+high)/2; 1989 if (rp[t] > col) high = t; 1990 else low = t; 1991 } 1992 for (i=low; i<high; i++) { 1993 if (rp[i] > col) break; 1994 if (rp[i] == col) { 1995 bap = ap + bs2*i; 1996 if (roworiented) { 1997 if (is == ADD_VALUES) { 1998 for (ii=0; ii<bs; ii++,value+=stepval) { 1999 for (jj=ii; jj<bs2; jj+=bs) { 2000 bap[jj] += *value++; 2001 } 2002 } 2003 } else { 2004 for (ii=0; ii<bs; ii++,value+=stepval) { 2005 for (jj=ii; jj<bs2; jj+=bs) { 2006 bap[jj] = *value++; 2007 } 2008 } 2009 } 2010 } else { 2011 if (is == ADD_VALUES) { 2012 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2013 for (jj=0; jj<bs; jj++) { 2014 bap[jj] += value[jj]; 2015 } 2016 bap += bs; 2017 } 2018 } else { 2019 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2020 for (jj=0; jj<bs; jj++) { 2021 bap[jj] = value[jj]; 2022 } 2023 bap += bs; 2024 } 2025 } 2026 } 2027 goto noinsert2; 2028 } 2029 } 2030 if (nonew == 1) goto noinsert2; 2031 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2032 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2033 N = nrow++ - 1; high++; 2034 /* shift up all the later entries in this row */ 2035 for (ii=N; ii>=i; ii--) { 2036 rp[ii+1] = rp[ii]; 2037 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2038 } 2039 if (N >= i) { 2040 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2041 } 2042 rp[i] = col; 2043 bap = ap + bs2*i; 2044 if (roworiented) { 2045 for (ii=0; ii<bs; ii++,value+=stepval) { 2046 for (jj=ii; jj<bs2; jj+=bs) { 2047 bap[jj] = *value++; 2048 } 2049 } 2050 } else { 2051 for (ii=0; ii<bs; ii++,value+=stepval) { 2052 for (jj=0; jj<bs; jj++) { 2053 *bap++ = *value++; 2054 } 2055 } 2056 } 2057 noinsert2:; 2058 low = i; 2059 } 2060 ailen[row] = nrow; 2061 } 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2067 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2068 { 2069 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2070 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2071 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2072 PetscErrorCode ierr; 2073 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2074 MatScalar *aa = a->a,*ap; 2075 PetscReal ratio=0.6; 2076 2077 PetscFunctionBegin; 2078 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2079 2080 if (m) rmax = ailen[0]; 2081 for (i=1; i<mbs; i++) { 2082 /* move each row back by the amount of empty slots (fshift) before it*/ 2083 fshift += imax[i-1] - ailen[i-1]; 2084 rmax = PetscMax(rmax,ailen[i]); 2085 if (fshift) { 2086 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2087 N = ailen[i]; 2088 for (j=0; j<N; j++) { 2089 ip[j-fshift] = ip[j]; 2090 2091 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2092 } 2093 } 2094 ai[i] = ai[i-1] + ailen[i-1]; 2095 } 2096 if (mbs) { 2097 fshift += imax[mbs-1] - ailen[mbs-1]; 2098 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2099 } 2100 /* reset ilen and imax for each row */ 2101 for (i=0; i<mbs; i++) { 2102 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2103 } 2104 a->nz = ai[mbs]; 2105 2106 /* diagonals may have moved, so kill the diagonal pointers */ 2107 a->idiagvalid = PETSC_FALSE; 2108 if (fshift && a->diag) { 2109 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2110 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2111 a->diag = 0; 2112 } 2113 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2114 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2115 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2116 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2117 2118 A->info.mallocs += a->reallocs; 2119 a->reallocs = 0; 2120 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2121 2122 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2123 2124 A->same_nonzero = PETSC_TRUE; 2125 PetscFunctionReturn(0); 2126 } 2127 2128 /* 2129 This function returns an array of flags which indicate the locations of contiguous 2130 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2131 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2132 Assume: sizes should be long enough to hold all the values. 2133 */ 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2136 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2137 { 2138 PetscInt i,j,k,row; 2139 PetscBool flg; 2140 2141 PetscFunctionBegin; 2142 for (i=0,j=0; i<n; j++) { 2143 row = idx[i]; 2144 if (row%bs!=0) { /* Not the begining of a block */ 2145 sizes[j] = 1; 2146 i++; 2147 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2148 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2149 i++; 2150 } else { /* Begining of the block, so check if the complete block exists */ 2151 flg = PETSC_TRUE; 2152 for (k=1; k<bs; k++) { 2153 if (row+k != idx[i+k]) { /* break in the block */ 2154 flg = PETSC_FALSE; 2155 break; 2156 } 2157 } 2158 if (flg) { /* No break in the bs */ 2159 sizes[j] = bs; 2160 i += bs; 2161 } else { 2162 sizes[j] = 1; 2163 i++; 2164 } 2165 } 2166 } 2167 *bs_max = j; 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2173 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2174 { 2175 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2176 PetscErrorCode ierr; 2177 PetscInt i,j,k,count,*rows; 2178 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2179 PetscScalar zero = 0.0; 2180 MatScalar *aa; 2181 const PetscScalar *xx; 2182 PetscScalar *bb; 2183 2184 PetscFunctionBegin; 2185 /* fix right hand side if needed */ 2186 if (x && b) { 2187 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2188 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2189 for (i=0; i<is_n; i++) { 2190 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2191 } 2192 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2193 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2194 } 2195 2196 /* Make a copy of the IS and sort it */ 2197 /* allocate memory for rows,sizes */ 2198 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2199 2200 /* copy IS values to rows, and sort them */ 2201 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2202 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2203 2204 if (baij->keepnonzeropattern) { 2205 for (i=0; i<is_n; i++) sizes[i] = 1; 2206 bs_max = is_n; 2207 A->same_nonzero = PETSC_TRUE; 2208 } else { 2209 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2210 2211 A->same_nonzero = PETSC_FALSE; 2212 } 2213 2214 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2215 row = rows[j]; 2216 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2217 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2218 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2219 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2220 if (diag != (PetscScalar)0.0) { 2221 if (baij->ilen[row/bs] > 0) { 2222 baij->ilen[row/bs] = 1; 2223 baij->j[baij->i[row/bs]] = row/bs; 2224 2225 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2226 } 2227 /* Now insert all the diagonal values for this bs */ 2228 for (k=0; k<bs; k++) { 2229 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2230 } 2231 } else { /* (diag == 0.0) */ 2232 baij->ilen[row/bs] = 0; 2233 } /* end (diag == 0.0) */ 2234 } else { /* (sizes[i] != bs) */ 2235 #if defined(PETSC_USE_DEBUG) 2236 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2237 #endif 2238 for (k=0; k<count; k++) { 2239 aa[0] = zero; 2240 aa += bs; 2241 } 2242 if (diag != (PetscScalar)0.0) { 2243 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2244 } 2245 } 2246 } 2247 2248 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2249 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2255 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2256 { 2257 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2258 PetscErrorCode ierr; 2259 PetscInt i,j,k,count; 2260 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2261 PetscScalar zero = 0.0; 2262 MatScalar *aa; 2263 const PetscScalar *xx; 2264 PetscScalar *bb; 2265 PetscBool *zeroed,vecs = PETSC_FALSE; 2266 2267 PetscFunctionBegin; 2268 /* fix right hand side if needed */ 2269 if (x && b) { 2270 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2271 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2272 vecs = PETSC_TRUE; 2273 } 2274 A->same_nonzero = PETSC_TRUE; 2275 2276 /* zero the columns */ 2277 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2278 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2279 for (i=0; i<is_n; i++) { 2280 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2281 zeroed[is_idx[i]] = PETSC_TRUE; 2282 } 2283 for (i=0; i<A->rmap->N; i++) { 2284 if (!zeroed[i]) { 2285 row = i/bs; 2286 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2287 for (k=0; k<bs; k++) { 2288 col = bs*baij->j[j] + k; 2289 if (zeroed[col]) { 2290 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2291 if (vecs) bb[i] -= aa[0]*xx[col]; 2292 aa[0] = 0.0; 2293 } 2294 } 2295 } 2296 } else if (vecs) bb[i] = diag*xx[i]; 2297 } 2298 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2299 if (vecs) { 2300 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2301 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2302 } 2303 2304 /* zero the rows */ 2305 for (i=0; i<is_n; i++) { 2306 row = is_idx[i]; 2307 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2308 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2309 for (k=0; k<count; k++) { 2310 aa[0] = zero; 2311 aa += bs; 2312 } 2313 if (diag != (PetscScalar)0.0) { 2314 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2315 } 2316 } 2317 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2323 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2324 { 2325 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2326 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2327 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2328 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2329 PetscErrorCode ierr; 2330 PetscInt ridx,cidx,bs2=a->bs2; 2331 PetscBool roworiented=a->roworiented; 2332 MatScalar *ap,value,*aa=a->a,*bap; 2333 2334 PetscFunctionBegin; 2335 if (v) PetscValidScalarPointer(v,6); 2336 for (k=0; k<m; k++) { /* loop over added rows */ 2337 row = im[k]; 2338 brow = row/bs; 2339 if (row < 0) continue; 2340 #if defined(PETSC_USE_DEBUG) 2341 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2342 #endif 2343 rp = aj + ai[brow]; 2344 ap = aa + bs2*ai[brow]; 2345 rmax = imax[brow]; 2346 nrow = ailen[brow]; 2347 low = 0; 2348 high = nrow; 2349 for (l=0; l<n; l++) { /* loop over added columns */ 2350 if (in[l] < 0) continue; 2351 #if defined(PETSC_USE_DEBUG) 2352 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2353 #endif 2354 col = in[l]; bcol = col/bs; 2355 ridx = row % bs; cidx = col % bs; 2356 if (roworiented) { 2357 value = v[l + k*n]; 2358 } else { 2359 value = v[k + l*m]; 2360 } 2361 if (col <= lastcol) low = 0; else high = nrow; 2362 lastcol = col; 2363 while (high-low > 7) { 2364 t = (low+high)/2; 2365 if (rp[t] > bcol) high = t; 2366 else low = t; 2367 } 2368 for (i=low; i<high; i++) { 2369 if (rp[i] > bcol) break; 2370 if (rp[i] == bcol) { 2371 bap = ap + bs2*i + bs*cidx + ridx; 2372 if (is == ADD_VALUES) *bap += value; 2373 else *bap = value; 2374 goto noinsert1; 2375 } 2376 } 2377 if (nonew == 1) goto noinsert1; 2378 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2379 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2380 N = nrow++ - 1; high++; 2381 /* shift up all the later entries in this row */ 2382 for (ii=N; ii>=i; ii--) { 2383 rp[ii+1] = rp[ii]; 2384 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2385 } 2386 if (N>=i) { 2387 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2388 } 2389 rp[i] = bcol; 2390 ap[bs2*i + bs*cidx + ridx] = value; 2391 a->nz++; 2392 noinsert1:; 2393 low = i; 2394 } 2395 ailen[brow] = nrow; 2396 } 2397 A->same_nonzero = PETSC_FALSE; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2403 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2404 { 2405 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2406 Mat outA; 2407 PetscErrorCode ierr; 2408 PetscBool row_identity,col_identity; 2409 2410 PetscFunctionBegin; 2411 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2412 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2413 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2414 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2415 2416 outA = inA; 2417 inA->factortype = MAT_FACTOR_LU; 2418 2419 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2420 2421 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2422 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2423 a->row = row; 2424 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2425 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2426 a->col = col; 2427 2428 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2429 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2430 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2431 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2432 2433 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2434 if (!a->solve_work) { 2435 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2436 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2437 } 2438 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2444 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2445 { 2446 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2447 PetscInt i,nz,mbs; 2448 2449 PetscFunctionBegin; 2450 nz = baij->maxnz; 2451 mbs = baij->mbs; 2452 for (i=0; i<nz; i++) { 2453 baij->j[i] = indices[i]; 2454 } 2455 baij->nz = nz; 2456 for (i=0; i<mbs; i++) { 2457 baij->ilen[i] = baij->imax[i]; 2458 } 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2464 /*@ 2465 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2466 in the matrix. 2467 2468 Input Parameters: 2469 + mat - the SeqBAIJ matrix 2470 - indices - the column indices 2471 2472 Level: advanced 2473 2474 Notes: 2475 This can be called if you have precomputed the nonzero structure of the 2476 matrix and want to provide it to the matrix object to improve the performance 2477 of the MatSetValues() operation. 2478 2479 You MUST have set the correct numbers of nonzeros per row in the call to 2480 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2481 2482 MUST be called before any calls to MatSetValues(); 2483 2484 @*/ 2485 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2486 { 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2491 PetscValidPointer(indices,2); 2492 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 #undef __FUNCT__ 2497 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2498 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2499 { 2500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2501 PetscErrorCode ierr; 2502 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2503 PetscReal atmp; 2504 PetscScalar *x,zero = 0.0; 2505 MatScalar *aa; 2506 PetscInt ncols,brow,krow,kcol; 2507 2508 PetscFunctionBegin; 2509 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2510 bs = A->rmap->bs; 2511 aa = a->a; 2512 ai = a->i; 2513 aj = a->j; 2514 mbs = a->mbs; 2515 2516 ierr = VecSet(v,zero);CHKERRQ(ierr); 2517 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2518 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2519 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2520 for (i=0; i<mbs; i++) { 2521 ncols = ai[1] - ai[0]; ai++; 2522 brow = bs*i; 2523 for (j=0; j<ncols; j++) { 2524 for (kcol=0; kcol<bs; kcol++) { 2525 for (krow=0; krow<bs; krow++) { 2526 atmp = PetscAbsScalar(*aa);aa++; 2527 row = brow + krow; /* row index */ 2528 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2529 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2530 } 2531 } 2532 aj++; 2533 } 2534 } 2535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatCopy_SeqBAIJ" 2541 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2542 { 2543 PetscErrorCode ierr; 2544 2545 PetscFunctionBegin; 2546 /* If the two matrices have the same copy implementation, use fast copy. */ 2547 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2548 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2549 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2550 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2551 2552 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2553 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2554 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2555 } else { 2556 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2563 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2564 { 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2574 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2575 { 2576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2577 2578 PetscFunctionBegin; 2579 *array = a->a; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 #undef __FUNCT__ 2584 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2585 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2586 { 2587 PetscFunctionBegin; 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2593 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2594 { 2595 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2596 PetscErrorCode ierr; 2597 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2598 PetscBLASInt one=1; 2599 2600 PetscFunctionBegin; 2601 if (str == SAME_NONZERO_PATTERN) { 2602 PetscScalar alpha = a; 2603 PetscBLASInt bnz; 2604 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2605 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2606 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2607 if (y->xtoy && y->XtoY != X) { 2608 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2609 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2610 } 2611 if (!y->xtoy) { /* get xtoy */ 2612 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2613 y->XtoY = X; 2614 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2615 } 2616 for (i=0; i<x->nz; i++) { 2617 j = 0; 2618 while (j < bs2) { 2619 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2620 j++; 2621 } 2622 } 2623 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2624 } else { 2625 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2626 } 2627 PetscFunctionReturn(0); 2628 } 2629 2630 #undef __FUNCT__ 2631 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2632 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2633 { 2634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2635 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2636 MatScalar *aa = a->a; 2637 2638 PetscFunctionBegin; 2639 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2640 PetscFunctionReturn(0); 2641 } 2642 2643 #undef __FUNCT__ 2644 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2645 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2646 { 2647 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2648 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2649 MatScalar *aa = a->a; 2650 2651 PetscFunctionBegin; 2652 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2657 2658 #undef __FUNCT__ 2659 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2660 /* 2661 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2662 */ 2663 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2664 { 2665 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2666 PetscErrorCode ierr; 2667 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2668 PetscInt nz = a->i[m],row,*jj,mr,col; 2669 2670 PetscFunctionBegin; 2671 *nn = n; 2672 if (!ia) PetscFunctionReturn(0); 2673 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2674 else { 2675 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2676 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2677 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2678 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2679 jj = a->j; 2680 for (i=0; i<nz; i++) { 2681 collengths[jj[i]]++; 2682 } 2683 cia[0] = oshift; 2684 for (i=0; i<n; i++) { 2685 cia[i+1] = cia[i] + collengths[i]; 2686 } 2687 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2688 jj = a->j; 2689 for (row=0; row<m; row++) { 2690 mr = a->i[row+1] - a->i[row]; 2691 for (i=0; i<mr; i++) { 2692 col = *jj++; 2693 2694 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2695 } 2696 } 2697 ierr = PetscFree(collengths);CHKERRQ(ierr); 2698 *ia = cia; *ja = cja; 2699 } 2700 PetscFunctionReturn(0); 2701 } 2702 2703 #undef __FUNCT__ 2704 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2705 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2706 { 2707 PetscErrorCode ierr; 2708 2709 PetscFunctionBegin; 2710 if (!ia) PetscFunctionReturn(0); 2711 ierr = PetscFree(*ia);CHKERRQ(ierr); 2712 ierr = PetscFree(*ja);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2718 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2719 { 2720 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2721 PetscErrorCode ierr; 2722 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2723 PetscScalar dx,*y,*xx,*w3_array; 2724 PetscScalar *vscale_array; 2725 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2726 Vec w1 = coloring->w1,w2=coloring->w2,w3; 2727 void *fctx = coloring->fctx; 2728 PetscBool flg = PETSC_FALSE; 2729 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2730 Vec x1_tmp; 2731 2732 PetscFunctionBegin; 2733 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2734 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2735 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2736 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2737 2738 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2739 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2740 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 2741 if (flg) { 2742 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2743 } else { 2744 PetscBool assembled; 2745 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2746 if (assembled) { 2747 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2748 } 2749 } 2750 2751 x1_tmp = x1; 2752 if (!coloring->vscale) { 2753 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2754 } 2755 2756 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2757 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2758 } 2759 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2760 2761 /* Set w1 = F(x1) */ 2762 if (!coloring->fset) { 2763 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2764 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2765 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2766 } else { 2767 coloring->fset = PETSC_FALSE; 2768 } 2769 2770 if (!coloring->w3) { 2771 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2772 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2773 } 2774 w3 = coloring->w3; 2775 2776 /* Compute all the local scale factors, including ghost points */ 2777 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2778 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2779 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2780 if (ctype == IS_COLORING_GHOSTED) { 2781 col_start = 0; col_end = N; 2782 } else if (ctype == IS_COLORING_GLOBAL) { 2783 xx = xx - start; 2784 vscale_array = vscale_array - start; 2785 col_start = start; col_end = N + start; 2786 } 2787 for (col=col_start; col<col_end; col++) { 2788 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2789 if (coloring->htype[0] == 'w') { 2790 dx = 1.0 + unorm; 2791 } else { 2792 dx = xx[col]; 2793 } 2794 if (dx == (PetscScalar)0.0) dx = 1.0; 2795 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2796 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2797 dx *= epsilon; 2798 vscale_array[col] = (PetscScalar)1.0/dx; 2799 } 2800 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2801 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2802 if (ctype == IS_COLORING_GLOBAL) { 2803 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2804 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2805 } 2806 if (coloring->vscaleforrow) { 2807 vscaleforrow = coloring->vscaleforrow; 2808 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2809 2810 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2811 /* 2812 Loop over each color 2813 */ 2814 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2815 for (k=0; k<coloring->ncolors; k++) { 2816 coloring->currentcolor = k; 2817 for (i=0; i<bs; i++) { 2818 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2819 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2820 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2821 /* 2822 Loop over each column associated with color 2823 adding the perturbation to the vector w3. 2824 */ 2825 for (l=0; l<coloring->ncolumns[k]; l++) { 2826 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2827 if (coloring->htype[0] == 'w') { 2828 dx = 1.0 + unorm; 2829 } else { 2830 dx = xx[col]; 2831 } 2832 if (dx == (PetscScalar)0.0) dx = 1.0; 2833 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2834 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2835 dx *= epsilon; 2836 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2837 w3_array[col] += dx; 2838 } 2839 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2840 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2841 2842 /* 2843 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2844 w2 = F(x1 + dx) - F(x1) 2845 */ 2846 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2847 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2848 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2849 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2850 2851 /* 2852 Loop over rows of vector, putting results into Jacobian matrix 2853 */ 2854 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2855 for (l=0; l<coloring->nrows[k]; l++) { 2856 row = bs*coloring->rows[k][l]; /* local row index */ 2857 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2858 for (j=0; j<bs; j++) { 2859 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2860 srows[j] = row + start + j; 2861 } 2862 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2863 } 2864 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2865 } 2866 } /* endof for each color */ 2867 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2868 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2870 ierr = PetscFree(srows);CHKERRQ(ierr); 2871 2872 coloring->currentcolor = -1; 2873 2874 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2875 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2876 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2877 PetscFunctionReturn(0); 2878 } 2879 2880 /* -------------------------------------------------------------------*/ 2881 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2882 MatGetRow_SeqBAIJ, 2883 MatRestoreRow_SeqBAIJ, 2884 MatMult_SeqBAIJ_N, 2885 /* 4*/ MatMultAdd_SeqBAIJ_N, 2886 MatMultTranspose_SeqBAIJ, 2887 MatMultTransposeAdd_SeqBAIJ, 2888 0, 2889 0, 2890 0, 2891 /* 10*/ 0, 2892 MatLUFactor_SeqBAIJ, 2893 0, 2894 0, 2895 MatTranspose_SeqBAIJ, 2896 /* 15*/ MatGetInfo_SeqBAIJ, 2897 MatEqual_SeqBAIJ, 2898 MatGetDiagonal_SeqBAIJ, 2899 MatDiagonalScale_SeqBAIJ, 2900 MatNorm_SeqBAIJ, 2901 /* 20*/ 0, 2902 MatAssemblyEnd_SeqBAIJ, 2903 MatSetOption_SeqBAIJ, 2904 MatZeroEntries_SeqBAIJ, 2905 /* 24*/ MatZeroRows_SeqBAIJ, 2906 0, 2907 0, 2908 0, 2909 0, 2910 /* 29*/ MatSetUp_SeqBAIJ, 2911 0, 2912 0, 2913 0, 2914 0, 2915 /* 34*/ MatDuplicate_SeqBAIJ, 2916 0, 2917 0, 2918 MatILUFactor_SeqBAIJ, 2919 0, 2920 /* 39*/ MatAXPY_SeqBAIJ, 2921 MatGetSubMatrices_SeqBAIJ, 2922 MatIncreaseOverlap_SeqBAIJ, 2923 MatGetValues_SeqBAIJ, 2924 MatCopy_SeqBAIJ, 2925 /* 44*/ 0, 2926 MatScale_SeqBAIJ, 2927 0, 2928 0, 2929 MatZeroRowsColumns_SeqBAIJ, 2930 /* 49*/ 0, 2931 MatGetRowIJ_SeqBAIJ, 2932 MatRestoreRowIJ_SeqBAIJ, 2933 MatGetColumnIJ_SeqBAIJ, 2934 MatRestoreColumnIJ_SeqBAIJ, 2935 /* 54*/ MatFDColoringCreate_SeqAIJ, 2936 0, 2937 0, 2938 0, 2939 MatSetValuesBlocked_SeqBAIJ, 2940 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2941 MatDestroy_SeqBAIJ, 2942 MatView_SeqBAIJ, 2943 0, 2944 0, 2945 /* 64*/ 0, 2946 0, 2947 0, 2948 0, 2949 0, 2950 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2951 0, 2952 MatConvert_Basic, 2953 0, 2954 0, 2955 /* 74*/ 0, 2956 MatFDColoringApply_BAIJ, 2957 0, 2958 0, 2959 0, 2960 /* 79*/ 0, 2961 0, 2962 0, 2963 0, 2964 MatLoad_SeqBAIJ, 2965 /* 84*/ 0, 2966 0, 2967 0, 2968 0, 2969 0, 2970 /* 89*/ 0, 2971 0, 2972 0, 2973 0, 2974 0, 2975 /* 94*/ 0, 2976 0, 2977 0, 2978 0, 2979 0, 2980 /* 99*/ 0, 2981 0, 2982 0, 2983 0, 2984 0, 2985 /*104*/ 0, 2986 MatRealPart_SeqBAIJ, 2987 MatImaginaryPart_SeqBAIJ, 2988 0, 2989 0, 2990 /*109*/ 0, 2991 0, 2992 0, 2993 0, 2994 MatMissingDiagonal_SeqBAIJ, 2995 /*114*/ 0, 2996 0, 2997 0, 2998 0, 2999 0, 3000 /*119*/ 0, 3001 0, 3002 MatMultHermitianTranspose_SeqBAIJ, 3003 MatMultHermitianTransposeAdd_SeqBAIJ, 3004 0, 3005 /*124*/ 0, 3006 0, 3007 MatInvertBlockDiagonal_SeqBAIJ, 3008 0, 3009 0, 3010 /*129*/ 0, 3011 0, 3012 0, 3013 0, 3014 0, 3015 /*134*/ 0, 3016 0, 3017 0, 3018 0, 3019 0, 3020 /*139*/ 0, 3021 0 3022 }; 3023 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3026 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3027 { 3028 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3029 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3030 PetscErrorCode ierr; 3031 3032 PetscFunctionBegin; 3033 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3034 3035 /* allocate space for values if not already there */ 3036 if (!aij->saved_values) { 3037 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3038 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3039 } 3040 3041 /* copy values over */ 3042 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3043 PetscFunctionReturn(0); 3044 } 3045 3046 #undef __FUNCT__ 3047 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3048 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3049 { 3050 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 3051 PetscErrorCode ierr; 3052 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 3053 3054 PetscFunctionBegin; 3055 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3056 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3057 3058 /* copy values over */ 3059 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3060 PetscFunctionReturn(0); 3061 } 3062 3063 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3064 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3065 3066 #undef __FUNCT__ 3067 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3068 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3069 { 3070 Mat_SeqBAIJ *b; 3071 PetscErrorCode ierr; 3072 PetscInt i,mbs,nbs,bs2; 3073 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 3074 3075 PetscFunctionBegin; 3076 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3077 if (nz == MAT_SKIP_ALLOCATION) { 3078 skipallocation = PETSC_TRUE; 3079 nz = 0; 3080 } 3081 3082 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3083 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3084 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3085 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3086 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3087 3088 B->preallocated = PETSC_TRUE; 3089 3090 mbs = B->rmap->n/bs; 3091 nbs = B->cmap->n/bs; 3092 bs2 = bs*bs; 3093 3094 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3095 3096 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3097 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3098 if (nnz) { 3099 for (i=0; i<mbs; i++) { 3100 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3101 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3102 } 3103 } 3104 3105 b = (Mat_SeqBAIJ*)B->data; 3106 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3107 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3108 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3109 3110 if (!flg) { 3111 switch (bs) { 3112 case 1: 3113 B->ops->mult = MatMult_SeqBAIJ_1; 3114 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3115 B->ops->sor = MatSOR_SeqBAIJ_1; 3116 break; 3117 case 2: 3118 B->ops->mult = MatMult_SeqBAIJ_2; 3119 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3120 B->ops->sor = MatSOR_SeqBAIJ_2; 3121 break; 3122 case 3: 3123 B->ops->mult = MatMult_SeqBAIJ_3; 3124 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3125 B->ops->sor = MatSOR_SeqBAIJ_3; 3126 break; 3127 case 4: 3128 B->ops->mult = MatMult_SeqBAIJ_4; 3129 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3130 B->ops->sor = MatSOR_SeqBAIJ_4; 3131 break; 3132 case 5: 3133 B->ops->mult = MatMult_SeqBAIJ_5; 3134 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3135 B->ops->sor = MatSOR_SeqBAIJ_5; 3136 break; 3137 case 6: 3138 B->ops->mult = MatMult_SeqBAIJ_6; 3139 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3140 B->ops->sor = MatSOR_SeqBAIJ_6; 3141 break; 3142 case 7: 3143 B->ops->mult = MatMult_SeqBAIJ_7; 3144 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3145 B->ops->sor = MatSOR_SeqBAIJ_7; 3146 break; 3147 case 15: 3148 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3149 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3150 B->ops->sor = MatSOR_SeqBAIJ_N; 3151 break; 3152 default: 3153 B->ops->mult = MatMult_SeqBAIJ_N; 3154 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3155 B->ops->sor = MatSOR_SeqBAIJ_N; 3156 break; 3157 } 3158 } 3159 b->mbs = mbs; 3160 b->nbs = nbs; 3161 if (!skipallocation) { 3162 if (!b->imax) { 3163 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3164 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3165 3166 b->free_imax_ilen = PETSC_TRUE; 3167 } 3168 /* b->ilen will count nonzeros in each block row so far. */ 3169 for (i=0; i<mbs; i++) b->ilen[i] = 0; 3170 if (!nnz) { 3171 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3172 else if (nz < 0) nz = 1; 3173 for (i=0; i<mbs; i++) b->imax[i] = nz; 3174 nz = nz*mbs; 3175 } else { 3176 nz = 0; 3177 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3178 } 3179 3180 /* allocate the matrix space */ 3181 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3182 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3183 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3184 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3185 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3186 3187 b->singlemalloc = PETSC_TRUE; 3188 b->i[0] = 0; 3189 for (i=1; i<mbs+1; i++) { 3190 b->i[i] = b->i[i-1] + b->imax[i-1]; 3191 } 3192 b->free_a = PETSC_TRUE; 3193 b->free_ij = PETSC_TRUE; 3194 } else { 3195 b->free_a = PETSC_FALSE; 3196 b->free_ij = PETSC_FALSE; 3197 } 3198 3199 b->bs2 = bs2; 3200 b->mbs = mbs; 3201 b->nz = 0; 3202 b->maxnz = nz; 3203 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3204 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3205 PetscFunctionReturn(0); 3206 } 3207 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3210 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3211 { 3212 PetscInt i,m,nz,nz_max=0,*nnz; 3213 PetscScalar *values=0; 3214 PetscErrorCode ierr; 3215 3216 PetscFunctionBegin; 3217 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3218 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3219 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3220 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3221 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3222 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3223 m = B->rmap->n/bs; 3224 3225 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3226 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3227 for (i=0; i<m; i++) { 3228 nz = ii[i+1]- ii[i]; 3229 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3230 nz_max = PetscMax(nz_max, nz); 3231 nnz[i] = nz; 3232 } 3233 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3234 ierr = PetscFree(nnz);CHKERRQ(ierr); 3235 3236 values = (PetscScalar*)V; 3237 if (!values) { 3238 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3239 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3240 } 3241 for (i=0; i<m; i++) { 3242 PetscInt ncols = ii[i+1] - ii[i]; 3243 const PetscInt *icols = jj + ii[i]; 3244 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3245 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3246 } 3247 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3248 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3249 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3250 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3251 PetscFunctionReturn(0); 3252 } 3253 3254 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3255 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3256 #if defined(PETSC_HAVE_MUMPS) 3257 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3258 #endif 3259 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 3260 3261 /*MC 3262 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3263 block sparse compressed row format. 3264 3265 Options Database Keys: 3266 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3267 3268 Level: beginner 3269 3270 .seealso: MatCreateSeqBAIJ() 3271 M*/ 3272 3273 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3274 3275 #undef __FUNCT__ 3276 #define __FUNCT__ "MatCreate_SeqBAIJ" 3277 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3278 { 3279 PetscErrorCode ierr; 3280 PetscMPIInt size; 3281 Mat_SeqBAIJ *b; 3282 3283 PetscFunctionBegin; 3284 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3285 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3286 3287 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3288 B->data = (void*)b; 3289 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3290 3291 b->row = 0; 3292 b->col = 0; 3293 b->icol = 0; 3294 b->reallocs = 0; 3295 b->saved_values = 0; 3296 3297 b->roworiented = PETSC_TRUE; 3298 b->nonew = 0; 3299 b->diag = 0; 3300 b->solve_work = 0; 3301 b->mult_work = 0; 3302 B->spptr = 0; 3303 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3304 b->keepnonzeropattern = PETSC_FALSE; 3305 b->xtoy = 0; 3306 b->XtoY = 0; 3307 B->same_nonzero = PETSC_FALSE; 3308 3309 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3310 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3311 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3312 #if defined(PETSC_HAVE_MUMPS) 3313 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3314 #endif 3315 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3316 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3317 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3318 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3319 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3320 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3321 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3322 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3323 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3324 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3325 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3326 PetscFunctionReturn(0); 3327 } 3328 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3331 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3332 { 3333 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3334 PetscErrorCode ierr; 3335 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3336 3337 PetscFunctionBegin; 3338 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3339 3340 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3341 c->imax = a->imax; 3342 c->ilen = a->ilen; 3343 c->free_imax_ilen = PETSC_FALSE; 3344 } else { 3345 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3346 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3347 for (i=0; i<mbs; i++) { 3348 c->imax[i] = a->imax[i]; 3349 c->ilen[i] = a->ilen[i]; 3350 } 3351 c->free_imax_ilen = PETSC_TRUE; 3352 } 3353 3354 /* allocate the matrix space */ 3355 if (mallocmatspace) { 3356 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3357 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3358 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3359 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3360 3361 c->i = a->i; 3362 c->j = a->j; 3363 c->singlemalloc = PETSC_FALSE; 3364 c->free_a = PETSC_TRUE; 3365 c->free_ij = PETSC_FALSE; 3366 c->parent = A; 3367 C->preallocated = PETSC_TRUE; 3368 C->assembled = PETSC_TRUE; 3369 3370 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3371 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3372 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3373 } else { 3374 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3375 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3376 3377 c->singlemalloc = PETSC_TRUE; 3378 c->free_a = PETSC_TRUE; 3379 c->free_ij = PETSC_TRUE; 3380 3381 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3382 if (mbs > 0) { 3383 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3384 if (cpvalues == MAT_COPY_VALUES) { 3385 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3386 } else { 3387 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3388 } 3389 } 3390 C->preallocated = PETSC_TRUE; 3391 C->assembled = PETSC_TRUE; 3392 } 3393 } 3394 3395 c->roworiented = a->roworiented; 3396 c->nonew = a->nonew; 3397 3398 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3399 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3400 3401 c->bs2 = a->bs2; 3402 c->mbs = a->mbs; 3403 c->nbs = a->nbs; 3404 3405 if (a->diag) { 3406 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3407 c->diag = a->diag; 3408 c->free_diag = PETSC_FALSE; 3409 } else { 3410 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3411 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3412 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3413 c->free_diag = PETSC_TRUE; 3414 } 3415 } else c->diag = 0; 3416 3417 c->nz = a->nz; 3418 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3419 c->solve_work = 0; 3420 c->mult_work = 0; 3421 3422 c->compressedrow.use = a->compressedrow.use; 3423 c->compressedrow.nrows = a->compressedrow.nrows; 3424 c->compressedrow.check = a->compressedrow.check; 3425 if (a->compressedrow.use) { 3426 i = a->compressedrow.nrows; 3427 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3428 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3429 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3430 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3431 } else { 3432 c->compressedrow.use = PETSC_FALSE; 3433 c->compressedrow.i = NULL; 3434 c->compressedrow.rindex = NULL; 3435 } 3436 C->same_nonzero = A->same_nonzero; 3437 3438 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3439 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3440 PetscFunctionReturn(0); 3441 } 3442 3443 #undef __FUNCT__ 3444 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3445 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3446 { 3447 PetscErrorCode ierr; 3448 3449 PetscFunctionBegin; 3450 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3451 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3452 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3453 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3454 PetscFunctionReturn(0); 3455 } 3456 3457 #undef __FUNCT__ 3458 #define __FUNCT__ "MatLoad_SeqBAIJ" 3459 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3460 { 3461 Mat_SeqBAIJ *a; 3462 PetscErrorCode ierr; 3463 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3464 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3465 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3466 PetscInt *masked,nmask,tmp,bs2,ishift; 3467 PetscMPIInt size; 3468 int fd; 3469 PetscScalar *aa; 3470 MPI_Comm comm; 3471 3472 PetscFunctionBegin; 3473 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3474 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3475 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3476 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3477 bs2 = bs*bs; 3478 3479 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3480 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3481 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3482 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3483 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3484 M = header[1]; N = header[2]; nz = header[3]; 3485 3486 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3487 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3488 3489 /* 3490 This code adds extra rows to make sure the number of rows is 3491 divisible by the blocksize 3492 */ 3493 mbs = M/bs; 3494 extra_rows = bs - M + bs*(mbs); 3495 if (extra_rows == bs) extra_rows = 0; 3496 else mbs++; 3497 if (extra_rows) { 3498 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3499 } 3500 3501 /* Set global sizes if not already set */ 3502 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3503 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3504 } else { /* Check if the matrix global sizes are correct */ 3505 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3506 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3507 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3508 } 3509 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3510 } 3511 3512 /* read in row lengths */ 3513 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3514 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3515 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3516 3517 /* read in column indices */ 3518 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3519 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3520 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3521 3522 /* loop over row lengths determining block row lengths */ 3523 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3524 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3525 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3526 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3527 rowcount = 0; 3528 nzcount = 0; 3529 for (i=0; i<mbs; i++) { 3530 nmask = 0; 3531 for (j=0; j<bs; j++) { 3532 kmax = rowlengths[rowcount]; 3533 for (k=0; k<kmax; k++) { 3534 tmp = jj[nzcount++]/bs; 3535 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3536 } 3537 rowcount++; 3538 } 3539 browlengths[i] += nmask; 3540 /* zero out the mask elements we set */ 3541 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3542 } 3543 3544 /* Do preallocation */ 3545 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3546 a = (Mat_SeqBAIJ*)newmat->data; 3547 3548 /* set matrix "i" values */ 3549 a->i[0] = 0; 3550 for (i=1; i<= mbs; i++) { 3551 a->i[i] = a->i[i-1] + browlengths[i-1]; 3552 a->ilen[i-1] = browlengths[i-1]; 3553 } 3554 a->nz = 0; 3555 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3556 3557 /* read in nonzero values */ 3558 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3559 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3560 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3561 3562 /* set "a" and "j" values into matrix */ 3563 nzcount = 0; jcount = 0; 3564 for (i=0; i<mbs; i++) { 3565 nzcountb = nzcount; 3566 nmask = 0; 3567 for (j=0; j<bs; j++) { 3568 kmax = rowlengths[i*bs+j]; 3569 for (k=0; k<kmax; k++) { 3570 tmp = jj[nzcount++]/bs; 3571 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3572 } 3573 } 3574 /* sort the masked values */ 3575 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3576 3577 /* set "j" values into matrix */ 3578 maskcount = 1; 3579 for (j=0; j<nmask; j++) { 3580 a->j[jcount++] = masked[j]; 3581 mask[masked[j]] = maskcount++; 3582 } 3583 /* set "a" values into matrix */ 3584 ishift = bs2*a->i[i]; 3585 for (j=0; j<bs; j++) { 3586 kmax = rowlengths[i*bs+j]; 3587 for (k=0; k<kmax; k++) { 3588 tmp = jj[nzcountb]/bs; 3589 block = mask[tmp] - 1; 3590 point = jj[nzcountb] - bs*tmp; 3591 idx = ishift + bs2*block + j + bs*point; 3592 a->a[idx] = (MatScalar)aa[nzcountb++]; 3593 } 3594 } 3595 /* zero out the mask elements we set */ 3596 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3597 } 3598 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3599 3600 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3601 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3602 ierr = PetscFree(aa);CHKERRQ(ierr); 3603 ierr = PetscFree(jj);CHKERRQ(ierr); 3604 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3605 3606 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3607 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3608 PetscFunctionReturn(0); 3609 } 3610 3611 #undef __FUNCT__ 3612 #define __FUNCT__ "MatCreateSeqBAIJ" 3613 /*@C 3614 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3615 compressed row) format. For good matrix assembly performance the 3616 user should preallocate the matrix storage by setting the parameter nz 3617 (or the array nnz). By setting these parameters accurately, performance 3618 during matrix assembly can be increased by more than a factor of 50. 3619 3620 Collective on MPI_Comm 3621 3622 Input Parameters: 3623 + comm - MPI communicator, set to PETSC_COMM_SELF 3624 . bs - size of block 3625 . m - number of rows 3626 . n - number of columns 3627 . nz - number of nonzero blocks per block row (same for all rows) 3628 - nnz - array containing the number of nonzero blocks in the various block rows 3629 (possibly different for each block row) or NULL 3630 3631 Output Parameter: 3632 . A - the matrix 3633 3634 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3635 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3636 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3637 3638 Options Database Keys: 3639 . -mat_no_unroll - uses code that does not unroll the loops in the 3640 block calculations (much slower) 3641 . -mat_block_size - size of the blocks to use 3642 3643 Level: intermediate 3644 3645 Notes: 3646 The number of rows and columns must be divisible by blocksize. 3647 3648 If the nnz parameter is given then the nz parameter is ignored 3649 3650 A nonzero block is any block that as 1 or more nonzeros in it 3651 3652 The block AIJ format is fully compatible with standard Fortran 77 3653 storage. That is, the stored row and column indices can begin at 3654 either one (as in Fortran) or zero. See the users' manual for details. 3655 3656 Specify the preallocated storage with either nz or nnz (not both). 3657 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3658 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3659 matrices. 3660 3661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3662 @*/ 3663 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3664 { 3665 PetscErrorCode ierr; 3666 3667 PetscFunctionBegin; 3668 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3669 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3670 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3671 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3672 PetscFunctionReturn(0); 3673 } 3674 3675 #undef __FUNCT__ 3676 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3677 /*@C 3678 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3679 per row in the matrix. For good matrix assembly performance the 3680 user should preallocate the matrix storage by setting the parameter nz 3681 (or the array nnz). By setting these parameters accurately, performance 3682 during matrix assembly can be increased by more than a factor of 50. 3683 3684 Collective on MPI_Comm 3685 3686 Input Parameters: 3687 + A - the matrix 3688 . bs - size of block 3689 . nz - number of block nonzeros per block row (same for all rows) 3690 - nnz - array containing the number of block nonzeros in the various block rows 3691 (possibly different for each block row) or NULL 3692 3693 Options Database Keys: 3694 . -mat_no_unroll - uses code that does not unroll the loops in the 3695 block calculations (much slower) 3696 . -mat_block_size - size of the blocks to use 3697 3698 Level: intermediate 3699 3700 Notes: 3701 If the nnz parameter is given then the nz parameter is ignored 3702 3703 You can call MatGetInfo() to get information on how effective the preallocation was; 3704 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3705 You can also run with the option -info and look for messages with the string 3706 malloc in them to see if additional memory allocation was needed. 3707 3708 The block AIJ format is fully compatible with standard Fortran 77 3709 storage. That is, the stored row and column indices can begin at 3710 either one (as in Fortran) or zero. See the users' manual for details. 3711 3712 Specify the preallocated storage with either nz or nnz (not both). 3713 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3714 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3715 3716 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3717 @*/ 3718 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3719 { 3720 PetscErrorCode ierr; 3721 3722 PetscFunctionBegin; 3723 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3724 PetscValidType(B,1); 3725 PetscValidLogicalCollectiveInt(B,bs,2); 3726 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3727 PetscFunctionReturn(0); 3728 } 3729 3730 #undef __FUNCT__ 3731 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3732 /*@C 3733 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3734 (the default sequential PETSc format). 3735 3736 Collective on MPI_Comm 3737 3738 Input Parameters: 3739 + A - the matrix 3740 . i - the indices into j for the start of each local row (starts with zero) 3741 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3742 - v - optional values in the matrix 3743 3744 Level: developer 3745 3746 .keywords: matrix, aij, compressed row, sparse 3747 3748 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3749 @*/ 3750 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3751 { 3752 PetscErrorCode ierr; 3753 3754 PetscFunctionBegin; 3755 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3756 PetscValidType(B,1); 3757 PetscValidLogicalCollectiveInt(B,bs,2); 3758 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3759 PetscFunctionReturn(0); 3760 } 3761 3762 3763 #undef __FUNCT__ 3764 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3765 /*@ 3766 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3767 3768 Collective on MPI_Comm 3769 3770 Input Parameters: 3771 + comm - must be an MPI communicator of size 1 3772 . bs - size of block 3773 . m - number of rows 3774 . n - number of columns 3775 . i - row indices 3776 . j - column indices 3777 - a - matrix values 3778 3779 Output Parameter: 3780 . mat - the matrix 3781 3782 Level: advanced 3783 3784 Notes: 3785 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3786 once the matrix is destroyed 3787 3788 You cannot set new nonzero locations into this matrix, that will generate an error. 3789 3790 The i and j indices are 0 based 3791 3792 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3793 3794 3795 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3796 3797 @*/ 3798 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3799 { 3800 PetscErrorCode ierr; 3801 PetscInt ii; 3802 Mat_SeqBAIJ *baij; 3803 3804 PetscFunctionBegin; 3805 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3806 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3807 3808 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3809 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3810 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3811 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3812 baij = (Mat_SeqBAIJ*)(*mat)->data; 3813 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3814 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3815 3816 baij->i = i; 3817 baij->j = j; 3818 baij->a = a; 3819 3820 baij->singlemalloc = PETSC_FALSE; 3821 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3822 baij->free_a = PETSC_FALSE; 3823 baij->free_ij = PETSC_FALSE; 3824 3825 for (ii=0; ii<m; ii++) { 3826 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3827 #if defined(PETSC_USE_DEBUG) 3828 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3829 #endif 3830 } 3831 #if defined(PETSC_USE_DEBUG) 3832 for (ii=0; ii<baij->i[m]; ii++) { 3833 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3834 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3835 } 3836 #endif 3837 3838 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3839 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3840 PetscFunctionReturn(0); 3841 } 3842