1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values)*values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs){ 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,x1,s1; 133 const PetscScalar *b; 134 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 135 PetscErrorCode ierr; 136 PetscInt m = a->mbs,i,i2,nz,j; 137 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 138 139 PetscFunctionBegin; 140 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 141 its = its*lits; 142 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 143 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 144 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 145 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 146 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 147 148 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 149 150 diag = a->diag; 151 idiag = a->idiag; 152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 153 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 154 155 if (flag & SOR_ZERO_INITIAL_GUESS) { 156 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 157 x[0] = b[0]*idiag[0]; 158 i2 = 1; 159 idiag += 1; 160 for (i=1; i<m; i++) { 161 v = aa + ai[i]; 162 vi = aj + ai[i]; 163 nz = diag[i] - ai[i]; 164 s1 = b[i2]; 165 for (j=0; j<nz; j++) { 166 s1 -= v[j]*x[vi[j]]; 167 } 168 x[i2] = idiag[0]*s1; 169 idiag += 1; 170 i2 += 1; 171 } 172 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 173 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 174 } 175 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 176 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 177 i2 = 0; 178 mdiag = a->idiag+a->mbs; 179 for (i=0; i<m; i++) { 180 x1 = x[i2]; 181 x[i2] = mdiag[0]*x1; 182 mdiag += 1; 183 i2 += 1; 184 } 185 ierr = PetscLogFlops(m);CHKERRQ(ierr); 186 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 187 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 188 } 189 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 190 idiag = a->idiag+a->mbs - 1; 191 i2 = m - 1; 192 x1 = x[i2]; 193 x[i2] = idiag[0]*x1; 194 idiag -= 1; 195 i2 -= 1; 196 for (i=m-2; i>=0; i--) { 197 v = aa + (diag[i]+1); 198 vi = aj + diag[i] + 1; 199 nz = ai[i+1] - diag[i] - 1; 200 s1 = x[i2]; 201 for (j=0; j<nz; j++) { 202 s1 -= v[j]*x[vi[j]]; 203 } 204 x[i2] = idiag[0]*s1; 205 idiag -= 1; 206 i2 -= 1; 207 } 208 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,s1,s2; 224 const PetscScalar *b; 225 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 232 its = its*lits; 233 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 250 i2 = 2; 251 idiag += 4; 252 for (i=1; i<m; i++) { 253 v = aa + 4*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; 257 for (j=0; j<nz; j++) { 258 idx = 2*vi[j]; 259 it = 4*j; 260 x1 = x[idx]; x2 = x[1+idx]; 261 s1 -= v[it]*x1 + v[it+2]*x2; 262 s2 -= v[it+1]*x1 + v[it+3]*x2; 263 } 264 x[i2] = idiag[0]*s1 + idiag[2]*s2; 265 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 266 idiag += 4; 267 i2 += 2; 268 } 269 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 270 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 271 } 272 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 273 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 274 i2 = 0; 275 mdiag = a->idiag+4*a->mbs; 276 for (i=0; i<m; i++) { 277 x1 = x[i2]; x2 = x[i2+1]; 278 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 279 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 280 mdiag += 4; 281 i2 += 2; 282 } 283 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 284 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 285 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 286 } 287 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 288 idiag = a->idiag+4*a->mbs - 4; 289 i2 = 2*m - 2; 290 x1 = x[i2]; x2 = x[i2+1]; 291 x[i2] = idiag[0]*x1 + idiag[2]*x2; 292 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 293 idiag -= 4; 294 i2 -= 2; 295 for (i=m-2; i>=0; i--) { 296 v = aa + 4*(diag[i]+1); 297 vi = aj + diag[i] + 1; 298 nz = ai[i+1] - diag[i] - 1; 299 s1 = x[i2]; s2 = x[i2+1]; 300 for (j=0; j<nz; j++) { 301 idx = 2*vi[j]; 302 it = 4*j; 303 x1 = x[idx]; x2 = x[1+idx]; 304 s1 -= v[it]*x1 + v[it+2]*x2; 305 s2 -= v[it+1]*x1 + v[it+3]*x2; 306 } 307 x[i2] = idiag[0]*s1 + idiag[2]*s2; 308 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 309 idiag -= 4; 310 i2 -= 2; 311 } 312 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 313 } 314 } else { 315 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 316 } 317 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 318 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 325 { 326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 327 PetscScalar *x,x1,x2,x3,s1,s2,s3; 328 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 329 const PetscScalar *b; 330 PetscErrorCode ierr; 331 PetscInt m = a->mbs,i,i2,nz,idx; 332 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 333 334 PetscFunctionBegin; 335 its = its*lits; 336 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 337 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 338 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 339 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 340 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 341 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 342 343 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 344 345 diag = a->diag; 346 idiag = a->idiag; 347 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 349 350 if (flag & SOR_ZERO_INITIAL_GUESS) { 351 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 352 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 353 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 354 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 355 i2 = 3; 356 idiag += 9; 357 for (i=1; i<m; i++) { 358 v = aa + 9*ai[i]; 359 vi = aj + ai[i]; 360 nz = diag[i] - ai[i]; 361 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 362 while (nz--) { 363 idx = 3*(*vi++); 364 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 365 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 366 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 367 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 368 v += 9; 369 } 370 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 371 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 372 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 373 idiag += 9; 374 i2 += 3; 375 } 376 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 377 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 378 } 379 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 380 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 381 i2 = 0; 382 mdiag = a->idiag+9*a->mbs; 383 for (i=0; i<m; i++) { 384 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 385 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 386 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 387 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 388 mdiag += 9; 389 i2 += 3; 390 } 391 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 392 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 393 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 394 } 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 396 idiag = a->idiag+9*a->mbs - 9; 397 i2 = 3*m - 3; 398 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 399 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 400 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 401 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 402 idiag -= 9; 403 i2 -= 3; 404 for (i=m-2; i>=0; i--) { 405 v = aa + 9*(diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 409 while (nz--) { 410 idx = 3*(*vi++); 411 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 412 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 413 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 414 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 415 v += 9; 416 } 417 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 418 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 419 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 420 idiag -= 9; 421 i2 -= 3; 422 } 423 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 424 } 425 } else { 426 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 427 } 428 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 429 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 436 { 437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 438 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 439 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 440 const PetscScalar *b; 441 PetscErrorCode ierr; 442 PetscInt m = a->mbs,i,i2,nz,idx; 443 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 444 445 PetscFunctionBegin; 446 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 447 its = its*lits; 448 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 449 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 450 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 451 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 452 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 453 454 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 455 456 diag = a->diag; 457 idiag = a->idiag; 458 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 459 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 460 461 if (flag & SOR_ZERO_INITIAL_GUESS) { 462 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 463 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 464 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 465 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 466 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 467 i2 = 4; 468 idiag += 16; 469 for (i=1; i<m; i++) { 470 v = aa + 16*ai[i]; 471 vi = aj + ai[i]; 472 nz = diag[i] - ai[i]; 473 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 474 while (nz--) { 475 idx = 4*(*vi++); 476 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 477 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 478 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 479 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 480 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 481 v += 16; 482 } 483 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 484 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 485 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 486 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 487 idiag += 16; 488 i2 += 4; 489 } 490 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 491 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 492 } 493 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 494 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 495 i2 = 0; 496 mdiag = a->idiag+16*a->mbs; 497 for (i=0; i<m; i++) { 498 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 499 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 500 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 501 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 502 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 503 mdiag += 16; 504 i2 += 4; 505 } 506 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 507 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 508 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 509 } 510 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 511 idiag = a->idiag+16*a->mbs - 16; 512 i2 = 4*m - 4; 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 515 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 516 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 517 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 518 idiag -= 16; 519 i2 -= 4; 520 for (i=m-2; i>=0; i--) { 521 v = aa + 16*(diag[i]+1); 522 vi = aj + diag[i] + 1; 523 nz = ai[i+1] - diag[i] - 1; 524 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 525 while (nz--) { 526 idx = 4*(*vi++); 527 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 528 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 529 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 530 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 531 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 532 v += 16; 533 } 534 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 535 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 536 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 537 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 538 idiag -= 16; 539 i2 -= 4; 540 } 541 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 542 } 543 } else { 544 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 545 } 546 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 547 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 554 { 555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 556 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 557 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 558 const PetscScalar *b; 559 PetscErrorCode ierr; 560 PetscInt m = a->mbs,i,i2,nz,idx; 561 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 562 563 PetscFunctionBegin; 564 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 565 its = its*lits; 566 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 567 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 568 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 569 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 570 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 571 572 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 573 574 diag = a->diag; 575 idiag = a->idiag; 576 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 577 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 578 579 if (flag & SOR_ZERO_INITIAL_GUESS) { 580 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 581 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 582 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 583 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 584 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 585 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 586 i2 = 5; 587 idiag += 25; 588 for (i=1; i<m; i++) { 589 v = aa + 25*ai[i]; 590 vi = aj + ai[i]; 591 nz = diag[i] - ai[i]; 592 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 593 while (nz--) { 594 idx = 5*(*vi++); 595 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 596 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 597 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 598 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 599 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 600 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 601 v += 25; 602 } 603 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 604 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 605 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 606 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 607 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 608 idiag += 25; 609 i2 += 5; 610 } 611 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 612 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 613 } 614 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 615 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 616 i2 = 0; 617 mdiag = a->idiag+25*a->mbs; 618 for (i=0; i<m; i++) { 619 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 620 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 621 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 622 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 623 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 624 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 625 mdiag += 25; 626 i2 += 5; 627 } 628 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 629 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 630 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 631 } 632 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 633 idiag = a->idiag+25*a->mbs - 25; 634 i2 = 5*m - 5; 635 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 636 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 637 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 638 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 639 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 640 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 641 idiag -= 25; 642 i2 -= 5; 643 for (i=m-2; i>=0; i--) { 644 v = aa + 25*(diag[i]+1); 645 vi = aj + diag[i] + 1; 646 nz = ai[i+1] - diag[i] - 1; 647 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 651 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 652 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 653 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 654 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 655 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 656 v += 25; 657 } 658 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 659 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 660 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 661 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 662 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 663 idiag -= 25; 664 i2 -= 5; 665 } 666 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 667 } 668 } else { 669 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 670 } 671 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 672 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 679 { 680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 681 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 682 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 683 const PetscScalar *b; 684 PetscErrorCode ierr; 685 PetscInt m = a->mbs,i,i2,nz,idx; 686 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 687 688 PetscFunctionBegin; 689 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 690 its = its*lits; 691 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 692 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 693 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 694 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 695 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 696 697 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 698 699 diag = a->diag; 700 idiag = a->idiag; 701 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 702 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 703 704 if (flag & SOR_ZERO_INITIAL_GUESS) { 705 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 706 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 707 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 708 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 709 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 710 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 711 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 712 i2 = 6; 713 idiag += 36; 714 for (i=1; i<m; i++) { 715 v = aa + 36*ai[i]; 716 vi = aj + ai[i]; 717 nz = diag[i] - ai[i]; 718 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 719 while (nz--) { 720 idx = 6*(*vi++); 721 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 722 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 723 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 724 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 725 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 726 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 727 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 728 v += 36; 729 } 730 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 731 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 732 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 733 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 734 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 735 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 736 idiag += 36; 737 i2 += 6; 738 } 739 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 740 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 741 } 742 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 743 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 744 i2 = 0; 745 mdiag = a->idiag+36*a->mbs; 746 for (i=0; i<m; i++) { 747 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 748 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 749 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 750 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 751 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 752 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 753 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 754 mdiag += 36; 755 i2 += 6; 756 } 757 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 758 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 759 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 760 } 761 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 762 idiag = a->idiag+36*a->mbs - 36; 763 i2 = 6*m - 6; 764 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 765 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 766 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 767 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 768 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 769 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 770 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 771 idiag -= 36; 772 i2 -= 6; 773 for (i=m-2; i>=0; i--) { 774 v = aa + 36*(diag[i]+1); 775 vi = aj + diag[i] + 1; 776 nz = ai[i+1] - diag[i] - 1; 777 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 778 while (nz--) { 779 idx = 6*(*vi++); 780 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 781 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 782 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 783 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 784 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 785 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 786 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 787 v += 36; 788 } 789 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 790 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 791 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 792 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 793 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 794 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 795 idiag -= 36; 796 i2 -= 6; 797 } 798 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 799 } 800 } else { 801 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 802 } 803 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 804 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 811 { 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 813 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 814 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 815 const PetscScalar *b; 816 PetscErrorCode ierr; 817 PetscInt m = a->mbs,i,i2,nz,idx; 818 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 819 820 PetscFunctionBegin; 821 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 822 its = its*lits; 823 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 824 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 825 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 826 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 827 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 828 829 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 830 831 diag = a->diag; 832 idiag = a->idiag; 833 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 834 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 835 836 if (flag & SOR_ZERO_INITIAL_GUESS) { 837 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 838 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 839 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 840 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 841 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 842 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 843 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 844 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 845 i2 = 7; 846 idiag += 49; 847 for (i=1; i<m; i++) { 848 v = aa + 49*ai[i]; 849 vi = aj + ai[i]; 850 nz = diag[i] - ai[i]; 851 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 852 while (nz--) { 853 idx = 7*(*vi++); 854 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 855 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 856 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 857 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 858 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 859 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 860 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 861 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 862 v += 49; 863 } 864 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 865 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 866 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 867 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 868 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 869 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 870 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 871 idiag += 49; 872 i2 += 7; 873 } 874 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 875 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 876 } 877 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 878 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 879 i2 = 0; 880 mdiag = a->idiag+49*a->mbs; 881 for (i=0; i<m; i++) { 882 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 883 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 884 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 885 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 886 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 887 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 888 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 889 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 890 mdiag += 49; 891 i2 += 7; 892 } 893 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 894 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 895 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 896 } 897 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 898 idiag = a->idiag+49*a->mbs - 49; 899 i2 = 7*m - 7; 900 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 901 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 902 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 903 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 904 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 905 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 906 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 907 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 908 idiag -= 49; 909 i2 -= 7; 910 for (i=m-2; i>=0; i--) { 911 v = aa + 49*(diag[i]+1); 912 vi = aj + diag[i] + 1; 913 nz = ai[i+1] - diag[i] - 1; 914 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 915 while (nz--) { 916 idx = 7*(*vi++); 917 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 918 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 919 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 920 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 921 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 922 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 923 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 924 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 925 v += 49; 926 } 927 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 928 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 929 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 930 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 931 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 932 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 933 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 934 idiag -= 49; 935 i2 -= 7; 936 } 937 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 938 } 939 } else { 940 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 941 } 942 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 943 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 950 { 951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 952 PetscScalar *x,*work,*w,*workt; 953 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 954 const PetscScalar *b; 955 PetscErrorCode ierr; 956 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 957 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 958 959 PetscFunctionBegin; 960 its = its*lits; 961 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 962 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 963 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 964 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 965 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 966 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 967 968 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);} 969 970 diag = a->diag; 971 idiag = a->idiag; 972 if (!a->mult_work) { 973 k = PetscMax(A->rmap->n,A->cmap->n); 974 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 975 } 976 work = a->mult_work; 977 if (!a->sor_work) { 978 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 979 } 980 w = a->sor_work; 981 982 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 983 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 984 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 987 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 988 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 989 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 990 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 991 i2 = bs; 992 idiag += bs2; 993 for (i=1; i<m; i++) { 994 v = aa + bs2*ai[i]; 995 vi = aj + ai[i]; 996 nz = diag[i] - ai[i]; 997 998 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 999 /* copy all rows of x that are needed into contiguous space */ 1000 workt = work; 1001 for (j=0; j<nz; j++) { 1002 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1003 workt += bs; 1004 } 1005 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1006 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1007 while (nz--) { 1008 idx = N*(*vi++); 1009 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1010 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1011 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1012 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1013 v += N2; 1014 } */ 1015 1016 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1017 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1018 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1019 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1020 1021 idiag += bs2; 1022 i2 += bs; 1023 } 1024 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1025 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1026 } 1027 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1028 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1029 i2 = 0; 1030 mdiag = a->idiag+bs2*a->mbs; 1031 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1032 for (i=0; i<m; i++) { 1033 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1034 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1035 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1036 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1037 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1038 1039 mdiag += bs2; 1040 i2 += bs; 1041 } 1042 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1043 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1044 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1045 } 1046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1047 idiag = a->idiag+bs2*a->mbs - bs2; 1048 i2 = bs*m - bs; 1049 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1050 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1051 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1052 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1053 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1054 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1055 idiag -= bs2; 1056 i2 -= bs; 1057 for (i=m-2; i>=0; i--) { 1058 v = aa + bs2*(diag[i]+1); 1059 vi = aj + diag[i] + 1; 1060 nz = ai[i+1] - diag[i] - 1; 1061 1062 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1063 /* copy all rows of x that are needed into contiguous space */ 1064 workt = work; 1065 for (j=0; j<nz; j++) { 1066 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1067 workt += bs; 1068 } 1069 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1070 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1071 while (nz--) { 1072 idx = N*(*vi++); 1073 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1074 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1075 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1076 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1077 v += N2; 1078 } */ 1079 1080 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1081 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1082 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1083 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1084 idiag -= bs2; 1085 i2 -= bs; 1086 } 1087 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1088 } 1089 } else { 1090 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1091 } 1092 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 /* 1098 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1099 */ 1100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1103 #define matsetvaluesblocked4_ matsetvaluesblocked4 1104 #endif 1105 1106 EXTERN_C_BEGIN 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "matsetvaluesblocked4_" 1109 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1110 { 1111 Mat A = *AA; 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1113 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1114 PetscInt *ai=a->i,*ailen=a->ilen; 1115 PetscInt *aj=a->j,stepval,lastcol = -1; 1116 const PetscScalar *value = v; 1117 MatScalar *ap,*aa = a->a,*bap; 1118 1119 PetscFunctionBegin; 1120 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1121 stepval = (n-1)*4; 1122 for (k=0; k<m; k++) { /* loop over added rows */ 1123 row = im[k]; 1124 rp = aj + ai[row]; 1125 ap = aa + 16*ai[row]; 1126 nrow = ailen[row]; 1127 low = 0; 1128 high = nrow; 1129 for (l=0; l<n; l++) { /* loop over added columns */ 1130 col = in[l]; 1131 if (col <= lastcol) low = 0; else high = nrow; 1132 lastcol = col; 1133 value = v + k*(stepval+4 + l)*4; 1134 while (high-low > 7) { 1135 t = (low+high)/2; 1136 if (rp[t] > col) high = t; 1137 else low = t; 1138 } 1139 for (i=low; i<high; i++) { 1140 if (rp[i] > col) break; 1141 if (rp[i] == col) { 1142 bap = ap + 16*i; 1143 for (ii=0; ii<4; ii++,value+=stepval) { 1144 for (jj=ii; jj<16; jj+=4) { 1145 bap[jj] += *value++; 1146 } 1147 } 1148 goto noinsert2; 1149 } 1150 } 1151 N = nrow++ - 1; 1152 high++; /* added new column index thus must search to one higher than before */ 1153 /* shift up all the later entries in this row */ 1154 for (ii=N; ii>=i; ii--) { 1155 rp[ii+1] = rp[ii]; 1156 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1157 } 1158 if (N >= i) { 1159 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1160 } 1161 rp[i] = col; 1162 bap = ap + 16*i; 1163 for (ii=0; ii<4; ii++,value+=stepval) { 1164 for (jj=ii; jj<16; jj+=4) { 1165 bap[jj] = *value++; 1166 } 1167 } 1168 noinsert2:; 1169 low = i; 1170 } 1171 ailen[row] = nrow; 1172 } 1173 PetscFunctionReturnVoid(); 1174 } 1175 EXTERN_C_END 1176 1177 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1178 #define matsetvalues4_ MATSETVALUES4 1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1180 #define matsetvalues4_ matsetvalues4 1181 #endif 1182 1183 EXTERN_C_BEGIN 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatSetValues4_" 1186 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1187 { 1188 Mat A = *AA; 1189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1190 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1191 PetscInt *ai=a->i,*ailen=a->ilen; 1192 PetscInt *aj=a->j,brow,bcol; 1193 PetscInt ridx,cidx,lastcol = -1; 1194 MatScalar *ap,value,*aa=a->a,*bap; 1195 1196 PetscFunctionBegin; 1197 for (k=0; k<m; k++) { /* loop over added rows */ 1198 row = im[k]; brow = row/4; 1199 rp = aj + ai[brow]; 1200 ap = aa + 16*ai[brow]; 1201 nrow = ailen[brow]; 1202 low = 0; 1203 high = nrow; 1204 for (l=0; l<n; l++) { /* loop over added columns */ 1205 col = in[l]; bcol = col/4; 1206 ridx = row % 4; cidx = col % 4; 1207 value = v[l + k*n]; 1208 if (col <= lastcol) low = 0; else high = nrow; 1209 lastcol = col; 1210 while (high-low > 7) { 1211 t = (low+high)/2; 1212 if (rp[t] > bcol) high = t; 1213 else low = t; 1214 } 1215 for (i=low; i<high; i++) { 1216 if (rp[i] > bcol) break; 1217 if (rp[i] == bcol) { 1218 bap = ap + 16*i + 4*cidx + ridx; 1219 *bap += value; 1220 goto noinsert1; 1221 } 1222 } 1223 N = nrow++ - 1; 1224 high++; /* added new column thus must search to one higher than before */ 1225 /* shift up all the later entries in this row */ 1226 for (ii=N; ii>=i; ii--) { 1227 rp[ii+1] = rp[ii]; 1228 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1229 } 1230 if (N>=i) { 1231 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1232 } 1233 rp[i] = bcol; 1234 ap[16*i + 4*cidx + ridx] = value; 1235 noinsert1:; 1236 low = i; 1237 } 1238 ailen[brow] = nrow; 1239 } 1240 PetscFunctionReturnVoid(); 1241 } 1242 EXTERN_C_END 1243 1244 /* 1245 Checks for missing diagonals 1246 */ 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1250 { 1251 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 PetscInt *diag,*jj = a->j,i; 1254 1255 PetscFunctionBegin; 1256 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1257 *missing = PETSC_FALSE; 1258 if (A->rmap->n > 0 && !jj) { 1259 *missing = PETSC_TRUE; 1260 if (d) *d = 0; 1261 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1262 } else { 1263 diag = a->diag; 1264 for (i=0; i<a->mbs; i++) { 1265 if (jj[diag[i]] != i) { 1266 *missing = PETSC_TRUE; 1267 if (d) *d = i; 1268 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1269 } 1270 } 1271 } 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1277 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1278 { 1279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1280 PetscErrorCode ierr; 1281 PetscInt i,j,m = a->mbs; 1282 1283 PetscFunctionBegin; 1284 if (!a->diag) { 1285 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1286 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1287 a->free_diag = PETSC_TRUE; 1288 } 1289 for (i=0; i<m; i++) { 1290 a->diag[i] = a->i[i+1]; 1291 for (j=a->i[i]; j<a->i[i+1]; j++) { 1292 if (a->j[j] == i) { 1293 a->diag[i] = j; 1294 break; 1295 } 1296 } 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 1302 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1306 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1307 { 1308 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1309 PetscErrorCode ierr; 1310 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1311 PetscInt *tia, *tja; 1312 1313 PetscFunctionBegin; 1314 *nn = n; 1315 if (!ia) PetscFunctionReturn(0); 1316 if (symmetric) { 1317 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1318 nz = tia[n]; 1319 } else { 1320 tia = a->i; tja = a->j; 1321 } 1322 1323 if (!blockcompressed && bs > 1) { 1324 (*nn) *= bs; 1325 /* malloc & create the natural set of indices */ 1326 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1327 if (n) { 1328 (*ia)[0] = 0; 1329 for (j=1; j<bs; j++) { 1330 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1331 } 1332 } 1333 1334 for (i=1; i<n; i++) { 1335 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1336 for (j=1; j<bs; j++) { 1337 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1338 } 1339 } 1340 if (n) { 1341 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1342 } 1343 1344 if (ja) { 1345 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1346 cnt = 0; 1347 for (i=0; i<n; i++) { 1348 for (j=0; j<bs; j++) { 1349 for (k=tia[i]; k<tia[i+1]; k++) { 1350 for (l=0; l<bs; l++) { 1351 (*ja)[cnt++] = bs*tja[k] + l; 1352 } 1353 } 1354 } 1355 } 1356 } 1357 1358 n *= bs; 1359 nz *= bs*bs; 1360 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1361 ierr = PetscFree(tia);CHKERRQ(ierr); 1362 ierr = PetscFree(tja);CHKERRQ(ierr); 1363 } 1364 } else if (oshift == 1) { 1365 if (symmetric) { 1366 PetscInt nz = tia[A->rmap->n/bs]; 1367 /* add 1 to i and j indices */ 1368 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1369 *ia = tia; 1370 if (ja) { 1371 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1372 *ja = tja; 1373 } 1374 } else { 1375 PetscInt nz = a->i[A->rmap->n/bs]; 1376 /* malloc space and add 1 to i and j indices */ 1377 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1378 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1379 if (ja) { 1380 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1381 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1382 } 1383 } 1384 } else { 1385 *ia = tia; 1386 if (ja) *ja = tja; 1387 } 1388 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1394 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1395 { 1396 PetscErrorCode ierr; 1397 1398 PetscFunctionBegin; 1399 if (!ia) PetscFunctionReturn(0); 1400 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1401 ierr = PetscFree(*ia);CHKERRQ(ierr); 1402 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1409 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1410 { 1411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1412 PetscErrorCode ierr; 1413 1414 PetscFunctionBegin; 1415 #if defined(PETSC_USE_LOG) 1416 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1417 #endif 1418 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1419 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1420 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1421 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1422 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1423 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1424 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1425 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1426 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1427 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1428 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1429 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1430 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1431 1432 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1433 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1434 ierr = PetscFree(A->data);CHKERRQ(ierr); 1435 1436 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1437 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1438 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1443 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1444 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1445 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1446 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1452 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1453 { 1454 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1455 PetscErrorCode ierr; 1456 1457 PetscFunctionBegin; 1458 switch (op) { 1459 case MAT_ROW_ORIENTED: 1460 a->roworiented = flg; 1461 break; 1462 case MAT_KEEP_NONZERO_PATTERN: 1463 a->keepnonzeropattern = flg; 1464 break; 1465 case MAT_NEW_NONZERO_LOCATIONS: 1466 a->nonew = (flg ? 0 : 1); 1467 break; 1468 case MAT_NEW_NONZERO_LOCATION_ERR: 1469 a->nonew = (flg ? -1 : 0); 1470 break; 1471 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1472 a->nonew = (flg ? -2 : 0); 1473 break; 1474 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1475 a->nounused = (flg ? -1 : 0); 1476 break; 1477 case MAT_CHECK_COMPRESSED_ROW: 1478 a->compressedrow.check = flg; 1479 break; 1480 case MAT_NEW_DIAGONALS: 1481 case MAT_IGNORE_OFF_PROC_ENTRIES: 1482 case MAT_USE_HASH_TABLE: 1483 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1484 break; 1485 case MAT_SYMMETRIC: 1486 case MAT_STRUCTURALLY_SYMMETRIC: 1487 case MAT_HERMITIAN: 1488 case MAT_SYMMETRY_ETERNAL: 1489 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1490 break; 1491 default: 1492 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1493 } 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1499 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1500 { 1501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1502 PetscErrorCode ierr; 1503 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1504 MatScalar *aa,*aa_i; 1505 PetscScalar *v_i; 1506 1507 PetscFunctionBegin; 1508 bs = A->rmap->bs; 1509 ai = a->i; 1510 aj = a->j; 1511 aa = a->a; 1512 bs2 = a->bs2; 1513 1514 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1515 1516 bn = row/bs; /* Block number */ 1517 bp = row % bs; /* Block Position */ 1518 M = ai[bn+1] - ai[bn]; 1519 *nz = bs*M; 1520 1521 if (v) { 1522 *v = 0; 1523 if (*nz) { 1524 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1525 for (i=0; i<M; i++) { /* for each block in the block row */ 1526 v_i = *v + i*bs; 1527 aa_i = aa + bs2*(ai[bn] + i); 1528 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1529 } 1530 } 1531 } 1532 1533 if (idx) { 1534 *idx = 0; 1535 if (*nz) { 1536 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1537 for (i=0; i<M; i++) { /* for each block in the block row */ 1538 idx_i = *idx + i*bs; 1539 itmp = bs*aj[ai[bn] + i]; 1540 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1541 } 1542 } 1543 } 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1549 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1550 { 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1555 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1556 PetscFunctionReturn(0); 1557 } 1558 1559 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1563 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1564 { 1565 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1566 Mat C; 1567 PetscErrorCode ierr; 1568 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1569 PetscInt *rows,*cols,bs2=a->bs2; 1570 MatScalar *array; 1571 1572 PetscFunctionBegin; 1573 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1574 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1575 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1576 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1577 1578 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1579 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1580 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1581 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1582 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1583 ierr = PetscFree(col);CHKERRQ(ierr); 1584 } else { 1585 C = *B; 1586 } 1587 1588 array = a->a; 1589 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1590 for (i=0; i<mbs; i++) { 1591 cols[0] = i*bs; 1592 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1593 len = ai[i+1] - ai[i]; 1594 for (j=0; j<len; j++) { 1595 rows[0] = (*aj++)*bs; 1596 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1597 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1598 array += bs2; 1599 } 1600 } 1601 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1602 1603 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1604 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1605 1606 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1607 *B = C; 1608 } else { 1609 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 EXTERN_C_BEGIN 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1617 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1618 { 1619 PetscErrorCode ierr; 1620 Mat Btrans; 1621 1622 PetscFunctionBegin; 1623 *f = PETSC_FALSE; 1624 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1625 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1626 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 EXTERN_C_END 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1633 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1634 { 1635 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1636 PetscErrorCode ierr; 1637 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1638 int fd; 1639 PetscScalar *aa; 1640 FILE *file; 1641 1642 PetscFunctionBegin; 1643 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1644 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1645 col_lens[0] = MAT_FILE_CLASSID; 1646 1647 col_lens[1] = A->rmap->N; 1648 col_lens[2] = A->cmap->n; 1649 col_lens[3] = a->nz*bs2; 1650 1651 /* store lengths of each row and write (including header) to file */ 1652 count = 0; 1653 for (i=0; i<a->mbs; i++) { 1654 for (j=0; j<bs; j++) { 1655 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1656 } 1657 } 1658 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1659 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1660 1661 /* store column indices (zero start index) */ 1662 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1663 count = 0; 1664 for (i=0; i<a->mbs; i++) { 1665 for (j=0; j<bs; j++) { 1666 for (k=a->i[i]; k<a->i[i+1]; k++) { 1667 for (l=0; l<bs; l++) { 1668 jj[count++] = bs*a->j[k] + l; 1669 } 1670 } 1671 } 1672 } 1673 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1674 ierr = PetscFree(jj);CHKERRQ(ierr); 1675 1676 /* store nonzero values */ 1677 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1678 count = 0; 1679 for (i=0; i<a->mbs; i++) { 1680 for (j=0; j<bs; j++) { 1681 for (k=a->i[i]; k<a->i[i+1]; k++) { 1682 for (l=0; l<bs; l++) { 1683 aa[count++] = a->a[bs2*k + l*bs + j]; 1684 } 1685 } 1686 } 1687 } 1688 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1689 ierr = PetscFree(aa);CHKERRQ(ierr); 1690 1691 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1692 if (file) { 1693 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1694 } 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1700 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1701 { 1702 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1703 PetscErrorCode ierr; 1704 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1705 PetscViewerFormat format; 1706 1707 PetscFunctionBegin; 1708 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1709 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1710 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1711 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1712 Mat aij; 1713 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1714 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1715 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1716 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1717 PetscFunctionReturn(0); 1718 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1719 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1720 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1721 for (i=0; i<a->mbs; i++) { 1722 for (j=0; j<bs; j++) { 1723 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1724 for (k=a->i[i]; k<a->i[i+1]; k++) { 1725 for (l=0; l<bs; l++) { 1726 #if defined(PETSC_USE_COMPLEX) 1727 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1728 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1729 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1730 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1731 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1732 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1733 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1734 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1735 } 1736 #else 1737 if (a->a[bs2*k + l*bs + j] != 0.0) { 1738 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1739 } 1740 #endif 1741 } 1742 } 1743 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1744 } 1745 } 1746 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1747 } else { 1748 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1749 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1750 for (i=0; i<a->mbs; i++) { 1751 for (j=0; j<bs; j++) { 1752 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1753 for (k=a->i[i]; k<a->i[i+1]; k++) { 1754 for (l=0; l<bs; l++) { 1755 #if defined(PETSC_USE_COMPLEX) 1756 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1757 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1758 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1759 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1760 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1761 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1762 } else { 1763 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1764 } 1765 #else 1766 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1767 #endif 1768 } 1769 } 1770 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1771 } 1772 } 1773 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1774 } 1775 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1781 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1782 { 1783 Mat A = (Mat) Aa; 1784 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1785 PetscErrorCode ierr; 1786 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1787 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1788 MatScalar *aa; 1789 PetscViewer viewer; 1790 PetscViewerFormat format; 1791 1792 PetscFunctionBegin; 1793 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1794 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1795 1796 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1797 1798 /* loop over matrix elements drawing boxes */ 1799 1800 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1801 color = PETSC_DRAW_BLUE; 1802 for (i=0,row=0; i<mbs; i++,row+=bs) { 1803 for (j=a->i[i]; j<a->i[i+1]; j++) { 1804 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1805 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1806 aa = a->a + j*bs2; 1807 for (k=0; k<bs; k++) { 1808 for (l=0; l<bs; l++) { 1809 if (PetscRealPart(*aa++) >= 0.) continue; 1810 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1811 } 1812 } 1813 } 1814 } 1815 color = PETSC_DRAW_CYAN; 1816 for (i=0,row=0; i<mbs; i++,row+=bs) { 1817 for (j=a->i[i]; j<a->i[i+1]; j++) { 1818 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1819 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1820 aa = a->a + j*bs2; 1821 for (k=0; k<bs; k++) { 1822 for (l=0; l<bs; l++) { 1823 if (PetscRealPart(*aa++) != 0.) continue; 1824 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1825 } 1826 } 1827 } 1828 } 1829 color = PETSC_DRAW_RED; 1830 for (i=0,row=0; i<mbs; i++,row+=bs) { 1831 for (j=a->i[i]; j<a->i[i+1]; j++) { 1832 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1833 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1834 aa = a->a + j*bs2; 1835 for (k=0; k<bs; k++) { 1836 for (l=0; l<bs; l++) { 1837 if (PetscRealPart(*aa++) <= 0.) continue; 1838 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1839 } 1840 } 1841 } 1842 } 1843 } else { 1844 /* use contour shading to indicate magnitude of values */ 1845 /* first determine max of all nonzero values */ 1846 PetscDraw popup; 1847 PetscReal scale,maxv = 0.0; 1848 1849 for (i=0; i<a->nz*a->bs2; i++) { 1850 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1851 } 1852 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1853 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1854 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1855 for (i=0,row=0; i<mbs; i++,row+=bs) { 1856 for (j=a->i[i]; j<a->i[i+1]; j++) { 1857 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1858 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1859 aa = a->a + j*bs2; 1860 for (k=0; k<bs; k++) { 1861 for (l=0; l<bs; l++) { 1862 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1863 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1864 } 1865 } 1866 } 1867 } 1868 } 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1874 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1875 { 1876 PetscErrorCode ierr; 1877 PetscReal xl,yl,xr,yr,w,h; 1878 PetscDraw draw; 1879 PetscBool isnull; 1880 1881 PetscFunctionBegin; 1882 1883 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1884 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1885 1886 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1887 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1888 xr += w; yr += h; xl = -w; yl = -h; 1889 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1890 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1891 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "MatView_SeqBAIJ" 1897 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1898 { 1899 PetscErrorCode ierr; 1900 PetscBool iascii,isbinary,isdraw; 1901 1902 PetscFunctionBegin; 1903 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1904 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1905 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1906 if (iascii){ 1907 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1908 } else if (isbinary) { 1909 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1910 } else if (isdraw) { 1911 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1912 } else { 1913 Mat B; 1914 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1915 ierr = MatView(B,viewer);CHKERRQ(ierr); 1916 ierr = MatDestroy(&B);CHKERRQ(ierr); 1917 } 1918 PetscFunctionReturn(0); 1919 } 1920 1921 1922 #undef __FUNCT__ 1923 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1924 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1925 { 1926 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1927 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1928 PetscInt *ai = a->i,*ailen = a->ilen; 1929 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1930 MatScalar *ap,*aa = a->a; 1931 1932 PetscFunctionBegin; 1933 for (k=0; k<m; k++) { /* loop over rows */ 1934 row = im[k]; brow = row/bs; 1935 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1936 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1937 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1938 nrow = ailen[brow]; 1939 for (l=0; l<n; l++) { /* loop over columns */ 1940 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1941 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1942 col = in[l] ; 1943 bcol = col/bs; 1944 cidx = col%bs; 1945 ridx = row%bs; 1946 high = nrow; 1947 low = 0; /* assume unsorted */ 1948 while (high-low > 5) { 1949 t = (low+high)/2; 1950 if (rp[t] > bcol) high = t; 1951 else low = t; 1952 } 1953 for (i=low; i<high; i++) { 1954 if (rp[i] > bcol) break; 1955 if (rp[i] == bcol) { 1956 *v++ = ap[bs2*i+bs*cidx+ridx]; 1957 goto finished; 1958 } 1959 } 1960 *v++ = 0.0; 1961 finished:; 1962 } 1963 } 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1969 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1970 { 1971 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1972 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1973 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1974 PetscErrorCode ierr; 1975 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1976 PetscBool roworiented=a->roworiented; 1977 const PetscScalar *value = v; 1978 MatScalar *ap,*aa = a->a,*bap; 1979 1980 PetscFunctionBegin; 1981 if (roworiented) { 1982 stepval = (n-1)*bs; 1983 } else { 1984 stepval = (m-1)*bs; 1985 } 1986 for (k=0; k<m; k++) { /* loop over added rows */ 1987 row = im[k]; 1988 if (row < 0) continue; 1989 #if defined(PETSC_USE_DEBUG) 1990 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1991 #endif 1992 rp = aj + ai[row]; 1993 ap = aa + bs2*ai[row]; 1994 rmax = imax[row]; 1995 nrow = ailen[row]; 1996 low = 0; 1997 high = nrow; 1998 for (l=0; l<n; l++) { /* loop over added columns */ 1999 if (in[l] < 0) continue; 2000 #if defined(PETSC_USE_DEBUG) 2001 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 2002 #endif 2003 col = in[l]; 2004 if (roworiented) { 2005 value = v + (k*(stepval+bs) + l)*bs; 2006 } else { 2007 value = v + (l*(stepval+bs) + k)*bs; 2008 } 2009 if (col <= lastcol) low = 0; else high = nrow; 2010 lastcol = col; 2011 while (high-low > 7) { 2012 t = (low+high)/2; 2013 if (rp[t] > col) high = t; 2014 else low = t; 2015 } 2016 for (i=low; i<high; i++) { 2017 if (rp[i] > col) break; 2018 if (rp[i] == col) { 2019 bap = ap + bs2*i; 2020 if (roworiented) { 2021 if (is == ADD_VALUES) { 2022 for (ii=0; ii<bs; ii++,value+=stepval) { 2023 for (jj=ii; jj<bs2; jj+=bs) { 2024 bap[jj] += *value++; 2025 } 2026 } 2027 } else { 2028 for (ii=0; ii<bs; ii++,value+=stepval) { 2029 for (jj=ii; jj<bs2; jj+=bs) { 2030 bap[jj] = *value++; 2031 } 2032 } 2033 } 2034 } else { 2035 if (is == ADD_VALUES) { 2036 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2037 for (jj=0; jj<bs; jj++) { 2038 bap[jj] += value[jj]; 2039 } 2040 bap += bs; 2041 } 2042 } else { 2043 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2044 for (jj=0; jj<bs; jj++) { 2045 bap[jj] = value[jj]; 2046 } 2047 bap += bs; 2048 } 2049 } 2050 } 2051 goto noinsert2; 2052 } 2053 } 2054 if (nonew == 1) goto noinsert2; 2055 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2056 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2057 N = nrow++ - 1; high++; 2058 /* shift up all the later entries in this row */ 2059 for (ii=N; ii>=i; ii--) { 2060 rp[ii+1] = rp[ii]; 2061 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2062 } 2063 if (N >= i) { 2064 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2065 } 2066 rp[i] = col; 2067 bap = ap + bs2*i; 2068 if (roworiented) { 2069 for (ii=0; ii<bs; ii++,value+=stepval) { 2070 for (jj=ii; jj<bs2; jj+=bs) { 2071 bap[jj] = *value++; 2072 } 2073 } 2074 } else { 2075 for (ii=0; ii<bs; ii++,value+=stepval) { 2076 for (jj=0; jj<bs; jj++) { 2077 *bap++ = *value++; 2078 } 2079 } 2080 } 2081 noinsert2:; 2082 low = i; 2083 } 2084 ailen[row] = nrow; 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2091 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2092 { 2093 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2094 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2095 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2096 PetscErrorCode ierr; 2097 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2098 MatScalar *aa = a->a,*ap; 2099 PetscReal ratio=0.6; 2100 2101 PetscFunctionBegin; 2102 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2103 2104 if (m) rmax = ailen[0]; 2105 for (i=1; i<mbs; i++) { 2106 /* move each row back by the amount of empty slots (fshift) before it*/ 2107 fshift += imax[i-1] - ailen[i-1]; 2108 rmax = PetscMax(rmax,ailen[i]); 2109 if (fshift) { 2110 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2111 N = ailen[i]; 2112 for (j=0; j<N; j++) { 2113 ip[j-fshift] = ip[j]; 2114 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2115 } 2116 } 2117 ai[i] = ai[i-1] + ailen[i-1]; 2118 } 2119 if (mbs) { 2120 fshift += imax[mbs-1] - ailen[mbs-1]; 2121 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2122 } 2123 /* reset ilen and imax for each row */ 2124 for (i=0; i<mbs; i++) { 2125 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2126 } 2127 a->nz = ai[mbs]; 2128 2129 /* diagonals may have moved, so kill the diagonal pointers */ 2130 a->idiagvalid = PETSC_FALSE; 2131 if (fshift && a->diag) { 2132 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2133 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2134 a->diag = 0; 2135 } 2136 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2137 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2138 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2139 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2140 A->info.mallocs += a->reallocs; 2141 a->reallocs = 0; 2142 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2143 2144 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2145 A->same_nonzero = PETSC_TRUE; 2146 PetscFunctionReturn(0); 2147 } 2148 2149 /* 2150 This function returns an array of flags which indicate the locations of contiguous 2151 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2152 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2153 Assume: sizes should be long enough to hold all the values. 2154 */ 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2157 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2158 { 2159 PetscInt i,j,k,row; 2160 PetscBool flg; 2161 2162 PetscFunctionBegin; 2163 for (i=0,j=0; i<n; j++) { 2164 row = idx[i]; 2165 if (row%bs!=0) { /* Not the begining of a block */ 2166 sizes[j] = 1; 2167 i++; 2168 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2169 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2170 i++; 2171 } else { /* Begining of the block, so check if the complete block exists */ 2172 flg = PETSC_TRUE; 2173 for (k=1; k<bs; k++) { 2174 if (row+k != idx[i+k]) { /* break in the block */ 2175 flg = PETSC_FALSE; 2176 break; 2177 } 2178 } 2179 if (flg) { /* No break in the bs */ 2180 sizes[j] = bs; 2181 i+= bs; 2182 } else { 2183 sizes[j] = 1; 2184 i++; 2185 } 2186 } 2187 } 2188 *bs_max = j; 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2194 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2195 { 2196 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2197 PetscErrorCode ierr; 2198 PetscInt i,j,k,count,*rows; 2199 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2200 PetscScalar zero = 0.0; 2201 MatScalar *aa; 2202 const PetscScalar *xx; 2203 PetscScalar *bb; 2204 2205 PetscFunctionBegin; 2206 /* fix right hand side if needed */ 2207 if (x && b) { 2208 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2209 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2210 for (i=0; i<is_n; i++) { 2211 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2212 } 2213 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2214 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2215 } 2216 2217 /* Make a copy of the IS and sort it */ 2218 /* allocate memory for rows,sizes */ 2219 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2220 2221 /* copy IS values to rows, and sort them */ 2222 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2223 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2224 2225 if (baij->keepnonzeropattern) { 2226 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2227 bs_max = is_n; 2228 A->same_nonzero = PETSC_TRUE; 2229 } else { 2230 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2231 A->same_nonzero = PETSC_FALSE; 2232 } 2233 2234 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2235 row = rows[j]; 2236 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2237 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2238 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2239 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2240 if (diag != (PetscScalar)0.0) { 2241 if (baij->ilen[row/bs] > 0) { 2242 baij->ilen[row/bs] = 1; 2243 baij->j[baij->i[row/bs]] = row/bs; 2244 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2245 } 2246 /* Now insert all the diagonal values for this bs */ 2247 for (k=0; k<bs; k++) { 2248 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2249 } 2250 } else { /* (diag == 0.0) */ 2251 baij->ilen[row/bs] = 0; 2252 } /* end (diag == 0.0) */ 2253 } else { /* (sizes[i] != bs) */ 2254 #if defined (PETSC_USE_DEBUG) 2255 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2256 #endif 2257 for (k=0; k<count; k++) { 2258 aa[0] = zero; 2259 aa += bs; 2260 } 2261 if (diag != (PetscScalar)0.0) { 2262 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2263 } 2264 } 2265 } 2266 2267 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2268 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2274 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2275 { 2276 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2277 PetscErrorCode ierr; 2278 PetscInt i,j,k,count; 2279 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2280 PetscScalar zero = 0.0; 2281 MatScalar *aa; 2282 const PetscScalar *xx; 2283 PetscScalar *bb; 2284 PetscBool *zeroed,vecs = PETSC_FALSE; 2285 2286 PetscFunctionBegin; 2287 /* fix right hand side if needed */ 2288 if (x && b) { 2289 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2290 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2291 vecs = PETSC_TRUE; 2292 } 2293 A->same_nonzero = PETSC_TRUE; 2294 2295 /* zero the columns */ 2296 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2297 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2298 for (i=0; i<is_n; i++) { 2299 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2300 zeroed[is_idx[i]] = PETSC_TRUE; 2301 } 2302 for (i=0; i<A->rmap->N; i++) { 2303 if (!zeroed[i]) { 2304 row = i/bs; 2305 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2306 for (k=0; k<bs; k++) { 2307 col = bs*baij->j[j] + k; 2308 if (zeroed[col]) { 2309 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2310 if (vecs) bb[i] -= aa[0]*xx[col]; 2311 aa[0] = 0.0; 2312 } 2313 } 2314 } 2315 } else if (vecs) bb[i] = diag*xx[i]; 2316 } 2317 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2318 if (vecs) { 2319 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2320 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2321 } 2322 2323 /* zero the rows */ 2324 for (i=0; i<is_n; i++) { 2325 row = is_idx[i]; 2326 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2327 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2328 for (k=0; k<count; k++) { 2329 aa[0] = zero; 2330 aa += bs; 2331 } 2332 if (diag != (PetscScalar)0.0) { 2333 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2334 } 2335 } 2336 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2342 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2343 { 2344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2345 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2347 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2348 PetscErrorCode ierr; 2349 PetscInt ridx,cidx,bs2=a->bs2; 2350 PetscBool roworiented=a->roworiented; 2351 MatScalar *ap,value,*aa=a->a,*bap; 2352 2353 PetscFunctionBegin; 2354 if (v) PetscValidScalarPointer(v,6); 2355 for (k=0; k<m; k++) { /* loop over added rows */ 2356 row = im[k]; 2357 brow = row/bs; 2358 if (row < 0) continue; 2359 #if defined(PETSC_USE_DEBUG) 2360 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2361 #endif 2362 rp = aj + ai[brow]; 2363 ap = aa + bs2*ai[brow]; 2364 rmax = imax[brow]; 2365 nrow = ailen[brow]; 2366 low = 0; 2367 high = nrow; 2368 for (l=0; l<n; l++) { /* loop over added columns */ 2369 if (in[l] < 0) continue; 2370 #if defined(PETSC_USE_DEBUG) 2371 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2372 #endif 2373 col = in[l]; bcol = col/bs; 2374 ridx = row % bs; cidx = col % bs; 2375 if (roworiented) { 2376 value = v[l + k*n]; 2377 } else { 2378 value = v[k + l*m]; 2379 } 2380 if (col <= lastcol) low = 0; else high = nrow; 2381 lastcol = col; 2382 while (high-low > 7) { 2383 t = (low+high)/2; 2384 if (rp[t] > bcol) high = t; 2385 else low = t; 2386 } 2387 for (i=low; i<high; i++) { 2388 if (rp[i] > bcol) break; 2389 if (rp[i] == bcol) { 2390 bap = ap + bs2*i + bs*cidx + ridx; 2391 if (is == ADD_VALUES) *bap += value; 2392 else *bap = value; 2393 goto noinsert1; 2394 } 2395 } 2396 if (nonew == 1) goto noinsert1; 2397 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2398 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2399 N = nrow++ - 1; high++; 2400 /* shift up all the later entries in this row */ 2401 for (ii=N; ii>=i; ii--) { 2402 rp[ii+1] = rp[ii]; 2403 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2404 } 2405 if (N>=i) { 2406 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2407 } 2408 rp[i] = bcol; 2409 ap[bs2*i + bs*cidx + ridx] = value; 2410 a->nz++; 2411 noinsert1:; 2412 low = i; 2413 } 2414 ailen[brow] = nrow; 2415 } 2416 A->same_nonzero = PETSC_FALSE; 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2422 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2423 { 2424 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2425 Mat outA; 2426 PetscErrorCode ierr; 2427 PetscBool row_identity,col_identity; 2428 2429 PetscFunctionBegin; 2430 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2431 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2432 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2433 if (!row_identity || !col_identity) { 2434 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2435 } 2436 2437 outA = inA; 2438 inA->factortype = MAT_FACTOR_LU; 2439 2440 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2441 2442 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2443 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2444 a->row = row; 2445 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2446 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2447 a->col = col; 2448 2449 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2450 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2451 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2452 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2453 2454 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2455 if (!a->solve_work) { 2456 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2457 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2458 } 2459 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2460 2461 PetscFunctionReturn(0); 2462 } 2463 2464 EXTERN_C_BEGIN 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2467 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2468 { 2469 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2470 PetscInt i,nz,mbs; 2471 2472 PetscFunctionBegin; 2473 nz = baij->maxnz; 2474 mbs = baij->mbs; 2475 for (i=0; i<nz; i++) { 2476 baij->j[i] = indices[i]; 2477 } 2478 baij->nz = nz; 2479 for (i=0; i<mbs; i++) { 2480 baij->ilen[i] = baij->imax[i]; 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 EXTERN_C_END 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2488 /*@ 2489 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2490 in the matrix. 2491 2492 Input Parameters: 2493 + mat - the SeqBAIJ matrix 2494 - indices - the column indices 2495 2496 Level: advanced 2497 2498 Notes: 2499 This can be called if you have precomputed the nonzero structure of the 2500 matrix and want to provide it to the matrix object to improve the performance 2501 of the MatSetValues() operation. 2502 2503 You MUST have set the correct numbers of nonzeros per row in the call to 2504 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2505 2506 MUST be called before any calls to MatSetValues(); 2507 2508 @*/ 2509 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2510 { 2511 PetscErrorCode ierr; 2512 2513 PetscFunctionBegin; 2514 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2515 PetscValidPointer(indices,2); 2516 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2522 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2523 { 2524 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2525 PetscErrorCode ierr; 2526 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2527 PetscReal atmp; 2528 PetscScalar *x,zero = 0.0; 2529 MatScalar *aa; 2530 PetscInt ncols,brow,krow,kcol; 2531 2532 PetscFunctionBegin; 2533 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2534 bs = A->rmap->bs; 2535 aa = a->a; 2536 ai = a->i; 2537 aj = a->j; 2538 mbs = a->mbs; 2539 2540 ierr = VecSet(v,zero);CHKERRQ(ierr); 2541 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2542 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2543 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2544 for (i=0; i<mbs; i++) { 2545 ncols = ai[1] - ai[0]; ai++; 2546 brow = bs*i; 2547 for (j=0; j<ncols; j++){ 2548 for (kcol=0; kcol<bs; kcol++){ 2549 for (krow=0; krow<bs; krow++){ 2550 atmp = PetscAbsScalar(*aa);aa++; 2551 row = brow + krow; /* row index */ 2552 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2553 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2554 } 2555 } 2556 aj++; 2557 } 2558 } 2559 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2560 PetscFunctionReturn(0); 2561 } 2562 2563 #undef __FUNCT__ 2564 #define __FUNCT__ "MatCopy_SeqBAIJ" 2565 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2566 { 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 /* If the two matrices have the same copy implementation, use fast copy. */ 2571 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2572 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2573 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2574 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2575 2576 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2577 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2578 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2579 } else { 2580 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2581 } 2582 PetscFunctionReturn(0); 2583 } 2584 2585 #undef __FUNCT__ 2586 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2587 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2588 { 2589 PetscErrorCode ierr; 2590 2591 PetscFunctionBegin; 2592 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2598 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2599 { 2600 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2601 PetscFunctionBegin; 2602 *array = a->a; 2603 PetscFunctionReturn(0); 2604 } 2605 2606 #undef __FUNCT__ 2607 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2608 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2609 { 2610 PetscFunctionBegin; 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2616 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2617 { 2618 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2619 PetscErrorCode ierr; 2620 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2621 PetscBLASInt one=1; 2622 2623 PetscFunctionBegin; 2624 if (str == SAME_NONZERO_PATTERN) { 2625 PetscScalar alpha = a; 2626 PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2); 2627 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2628 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2629 if (y->xtoy && y->XtoY != X) { 2630 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2631 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2632 } 2633 if (!y->xtoy) { /* get xtoy */ 2634 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2635 y->XtoY = X; 2636 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2637 } 2638 for (i=0; i<x->nz; i++) { 2639 j = 0; 2640 while (j < bs2){ 2641 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2642 j++; 2643 } 2644 } 2645 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2646 } else { 2647 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2648 } 2649 PetscFunctionReturn(0); 2650 } 2651 2652 #undef __FUNCT__ 2653 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2654 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2655 { 2656 PetscInt rbs,cbs; 2657 PetscErrorCode ierr; 2658 2659 PetscFunctionBegin; 2660 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2661 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2662 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2663 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2669 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2670 { 2671 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2672 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2673 MatScalar *aa = a->a; 2674 2675 PetscFunctionBegin; 2676 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2677 PetscFunctionReturn(0); 2678 } 2679 2680 #undef __FUNCT__ 2681 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2682 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2683 { 2684 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2685 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2686 MatScalar *aa = a->a; 2687 2688 PetscFunctionBegin; 2689 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2697 /* 2698 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2699 */ 2700 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2701 { 2702 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2703 PetscErrorCode ierr; 2704 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2705 PetscInt nz = a->i[m],row,*jj,mr,col; 2706 2707 PetscFunctionBegin; 2708 *nn = n; 2709 if (!ia) PetscFunctionReturn(0); 2710 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2711 else { 2712 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2713 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2714 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2715 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2716 jj = a->j; 2717 for (i=0; i<nz; i++) { 2718 collengths[jj[i]]++; 2719 } 2720 cia[0] = oshift; 2721 for (i=0; i<n; i++) { 2722 cia[i+1] = cia[i] + collengths[i]; 2723 } 2724 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2725 jj = a->j; 2726 for (row=0; row<m; row++) { 2727 mr = a->i[row+1] - a->i[row]; 2728 for (i=0; i<mr; i++) { 2729 col = *jj++; 2730 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2731 } 2732 } 2733 ierr = PetscFree(collengths);CHKERRQ(ierr); 2734 *ia = cia; *ja = cja; 2735 } 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2741 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2742 { 2743 PetscErrorCode ierr; 2744 2745 PetscFunctionBegin; 2746 if (!ia) PetscFunctionReturn(0); 2747 ierr = PetscFree(*ia);CHKERRQ(ierr); 2748 ierr = PetscFree(*ja);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 #undef __FUNCT__ 2753 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2754 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2755 { 2756 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2757 PetscErrorCode ierr; 2758 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2759 PetscScalar dx,*y,*xx,*w3_array; 2760 PetscScalar *vscale_array; 2761 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2762 Vec w1=coloring->w1,w2=coloring->w2,w3; 2763 void *fctx = coloring->fctx; 2764 PetscBool flg = PETSC_FALSE; 2765 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2766 Vec x1_tmp; 2767 2768 PetscFunctionBegin; 2769 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2770 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2771 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2772 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2773 2774 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2775 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2776 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2777 if (flg) { 2778 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2779 } else { 2780 PetscBool assembled; 2781 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2782 if (assembled) { 2783 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2784 } 2785 } 2786 2787 x1_tmp = x1; 2788 if (!coloring->vscale){ 2789 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2790 } 2791 2792 /* 2793 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2794 coloring->F for the coarser grids from the finest 2795 */ 2796 if (coloring->F) { 2797 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2798 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2799 if (m1 != m2) { 2800 coloring->F = 0; 2801 } 2802 } 2803 2804 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2805 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2806 } 2807 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2808 2809 /* Set w1 = F(x1) */ 2810 if (coloring->F) { 2811 w1 = coloring->F; /* use already computed value of function */ 2812 coloring->F = 0; 2813 } else { 2814 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2815 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2816 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2817 } 2818 2819 if (!coloring->w3) { 2820 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2821 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2822 } 2823 w3 = coloring->w3; 2824 2825 CHKMEMQ; 2826 /* Compute all the local scale factors, including ghost points */ 2827 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2828 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2829 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2830 if (ctype == IS_COLORING_GHOSTED){ 2831 col_start = 0; col_end = N; 2832 } else if (ctype == IS_COLORING_GLOBAL){ 2833 xx = xx - start; 2834 vscale_array = vscale_array - start; 2835 col_start = start; col_end = N + start; 2836 } CHKMEMQ; 2837 for (col=col_start; col<col_end; col++){ 2838 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2839 if (coloring->htype[0] == 'w') { 2840 dx = 1.0 + unorm; 2841 } else { 2842 dx = xx[col]; 2843 } 2844 if (dx == (PetscScalar)0.0) dx = 1.0; 2845 #if !defined(PETSC_USE_COMPLEX) 2846 if (dx < umin && dx >= 0.0) dx = umin; 2847 else if (dx < 0.0 && dx > -umin) dx = -umin; 2848 #else 2849 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2850 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2851 #endif 2852 dx *= epsilon; 2853 vscale_array[col] = (PetscScalar)1.0/dx; 2854 } CHKMEMQ; 2855 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2856 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2857 if (ctype == IS_COLORING_GLOBAL){ 2858 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2859 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2860 } 2861 CHKMEMQ; 2862 if (coloring->vscaleforrow) { 2863 vscaleforrow = coloring->vscaleforrow; 2864 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2865 2866 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2867 /* 2868 Loop over each color 2869 */ 2870 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2871 for (k=0; k<coloring->ncolors; k++) { 2872 coloring->currentcolor = k; 2873 for (i=0; i<bs; i++) { 2874 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2875 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2876 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2877 /* 2878 Loop over each column associated with color 2879 adding the perturbation to the vector w3. 2880 */ 2881 for (l=0; l<coloring->ncolumns[k]; l++) { 2882 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2883 if (coloring->htype[0] == 'w') { 2884 dx = 1.0 + unorm; 2885 } else { 2886 dx = xx[col]; 2887 } 2888 if (dx == (PetscScalar)0.0) dx = 1.0; 2889 #if !defined(PETSC_USE_COMPLEX) 2890 if (dx < umin && dx >= 0.0) dx = umin; 2891 else if (dx < 0.0 && dx > -umin) dx = -umin; 2892 #else 2893 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2894 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2895 #endif 2896 dx *= epsilon; 2897 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2898 w3_array[col] += dx; 2899 } 2900 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2901 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2902 2903 /* 2904 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2905 w2 = F(x1 + dx) - F(x1) 2906 */ 2907 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2908 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2909 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2910 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2911 2912 /* 2913 Loop over rows of vector, putting results into Jacobian matrix 2914 */ 2915 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2916 for (l=0; l<coloring->nrows[k]; l++) { 2917 row = bs*coloring->rows[k][l]; /* local row index */ 2918 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2919 for (j=0; j<bs; j++) { 2920 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2921 srows[j] = row + start + j; 2922 } 2923 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2924 } 2925 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2926 } 2927 } /* endof for each color */ 2928 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2929 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2930 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2931 ierr = PetscFree(srows);CHKERRQ(ierr); 2932 2933 coloring->currentcolor = -1; 2934 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2935 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2936 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 /* -------------------------------------------------------------------*/ 2941 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2942 MatGetRow_SeqBAIJ, 2943 MatRestoreRow_SeqBAIJ, 2944 MatMult_SeqBAIJ_N, 2945 /* 4*/ MatMultAdd_SeqBAIJ_N, 2946 MatMultTranspose_SeqBAIJ, 2947 MatMultTransposeAdd_SeqBAIJ, 2948 0, 2949 0, 2950 0, 2951 /*10*/ 0, 2952 MatLUFactor_SeqBAIJ, 2953 0, 2954 0, 2955 MatTranspose_SeqBAIJ, 2956 /*15*/ MatGetInfo_SeqBAIJ, 2957 MatEqual_SeqBAIJ, 2958 MatGetDiagonal_SeqBAIJ, 2959 MatDiagonalScale_SeqBAIJ, 2960 MatNorm_SeqBAIJ, 2961 /*20*/ 0, 2962 MatAssemblyEnd_SeqBAIJ, 2963 MatSetOption_SeqBAIJ, 2964 MatZeroEntries_SeqBAIJ, 2965 /*24*/ MatZeroRows_SeqBAIJ, 2966 0, 2967 0, 2968 0, 2969 0, 2970 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2971 0, 2972 0, 2973 MatGetArray_SeqBAIJ, 2974 MatRestoreArray_SeqBAIJ, 2975 /*34*/ MatDuplicate_SeqBAIJ, 2976 0, 2977 0, 2978 MatILUFactor_SeqBAIJ, 2979 0, 2980 /*39*/ MatAXPY_SeqBAIJ, 2981 MatGetSubMatrices_SeqBAIJ, 2982 MatIncreaseOverlap_SeqBAIJ, 2983 MatGetValues_SeqBAIJ, 2984 MatCopy_SeqBAIJ, 2985 /*44*/ 0, 2986 MatScale_SeqBAIJ, 2987 0, 2988 0, 2989 MatZeroRowsColumns_SeqBAIJ, 2990 /*49*/ MatSetBlockSize_SeqBAIJ, 2991 MatGetRowIJ_SeqBAIJ, 2992 MatRestoreRowIJ_SeqBAIJ, 2993 MatGetColumnIJ_SeqBAIJ, 2994 MatRestoreColumnIJ_SeqBAIJ, 2995 /*54*/ MatFDColoringCreate_SeqAIJ, 2996 0, 2997 0, 2998 0, 2999 MatSetValuesBlocked_SeqBAIJ, 3000 /*59*/ MatGetSubMatrix_SeqBAIJ, 3001 MatDestroy_SeqBAIJ, 3002 MatView_SeqBAIJ, 3003 0, 3004 0, 3005 /*64*/ 0, 3006 0, 3007 0, 3008 0, 3009 0, 3010 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 3011 0, 3012 MatConvert_Basic, 3013 0, 3014 0, 3015 /*74*/ 0, 3016 MatFDColoringApply_BAIJ, 3017 0, 3018 0, 3019 0, 3020 /*79*/ 0, 3021 0, 3022 0, 3023 0, 3024 MatLoad_SeqBAIJ, 3025 /*84*/ 0, 3026 0, 3027 0, 3028 0, 3029 0, 3030 /*89*/ 0, 3031 0, 3032 0, 3033 0, 3034 0, 3035 /*94*/ 0, 3036 0, 3037 0, 3038 0, 3039 0, 3040 /*99*/0, 3041 0, 3042 0, 3043 0, 3044 0, 3045 /*104*/0, 3046 MatRealPart_SeqBAIJ, 3047 MatImaginaryPart_SeqBAIJ, 3048 0, 3049 0, 3050 /*109*/0, 3051 0, 3052 0, 3053 0, 3054 MatMissingDiagonal_SeqBAIJ, 3055 /*114*/0, 3056 0, 3057 0, 3058 0, 3059 0, 3060 /*119*/0, 3061 0, 3062 MatMultHermitianTranspose_SeqBAIJ, 3063 MatMultHermitianTransposeAdd_SeqBAIJ, 3064 0, 3065 /*124*/0, 3066 0, 3067 MatInvertBlockDiagonal_SeqBAIJ 3068 }; 3069 3070 EXTERN_C_BEGIN 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3073 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3074 { 3075 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3076 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3077 PetscErrorCode ierr; 3078 3079 PetscFunctionBegin; 3080 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3081 3082 /* allocate space for values if not already there */ 3083 if (!aij->saved_values) { 3084 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3085 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3086 } 3087 3088 /* copy values over */ 3089 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 EXTERN_C_END 3093 3094 EXTERN_C_BEGIN 3095 #undef __FUNCT__ 3096 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3097 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3098 { 3099 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3100 PetscErrorCode ierr; 3101 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3102 3103 PetscFunctionBegin; 3104 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3105 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3106 3107 /* copy values over */ 3108 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3109 PetscFunctionReturn(0); 3110 } 3111 EXTERN_C_END 3112 3113 EXTERN_C_BEGIN 3114 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3115 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3116 EXTERN_C_END 3117 3118 EXTERN_C_BEGIN 3119 #undef __FUNCT__ 3120 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3121 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3122 { 3123 Mat_SeqBAIJ *b; 3124 PetscErrorCode ierr; 3125 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3126 PetscBool flg,skipallocation = PETSC_FALSE; 3127 3128 PetscFunctionBegin; 3129 3130 if (nz == MAT_SKIP_ALLOCATION) { 3131 skipallocation = PETSC_TRUE; 3132 nz = 0; 3133 } 3134 3135 if (bs < 0) { 3136 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3137 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3138 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3139 bs = PetscAbs(bs); 3140 } 3141 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3142 bs = newbs; 3143 3144 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3145 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3146 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3147 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3148 3149 B->preallocated = PETSC_TRUE; 3150 3151 mbs = B->rmap->n/bs; 3152 nbs = B->cmap->n/bs; 3153 bs2 = bs*bs; 3154 3155 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3156 3157 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3158 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3159 if (nnz) { 3160 for (i=0; i<mbs; i++) { 3161 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3162 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3163 } 3164 } 3165 3166 b = (Mat_SeqBAIJ*)B->data; 3167 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3168 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3169 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3170 3171 if (!flg) { 3172 switch (bs) { 3173 case 1: 3174 B->ops->mult = MatMult_SeqBAIJ_1; 3175 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3176 B->ops->sor = MatSOR_SeqBAIJ_1; 3177 break; 3178 case 2: 3179 B->ops->mult = MatMult_SeqBAIJ_2; 3180 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3181 B->ops->sor = MatSOR_SeqBAIJ_2; 3182 break; 3183 case 3: 3184 B->ops->mult = MatMult_SeqBAIJ_3; 3185 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3186 B->ops->sor = MatSOR_SeqBAIJ_3; 3187 break; 3188 case 4: 3189 B->ops->mult = MatMult_SeqBAIJ_4; 3190 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3191 B->ops->sor = MatSOR_SeqBAIJ_4; 3192 break; 3193 case 5: 3194 B->ops->mult = MatMult_SeqBAIJ_5; 3195 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3196 B->ops->sor = MatSOR_SeqBAIJ_5; 3197 break; 3198 case 6: 3199 B->ops->mult = MatMult_SeqBAIJ_6; 3200 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3201 B->ops->sor = MatSOR_SeqBAIJ_6; 3202 break; 3203 case 7: 3204 B->ops->mult = MatMult_SeqBAIJ_7; 3205 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3206 B->ops->sor = MatSOR_SeqBAIJ_7; 3207 break; 3208 case 15: 3209 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3210 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3211 B->ops->sor = MatSOR_SeqBAIJ_N; 3212 break; 3213 default: 3214 B->ops->mult = MatMult_SeqBAIJ_N; 3215 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3216 B->ops->sor = MatSOR_SeqBAIJ_N; 3217 break; 3218 } 3219 } 3220 B->rmap->bs = bs; 3221 b->mbs = mbs; 3222 b->nbs = nbs; 3223 if (!skipallocation) { 3224 if (!b->imax) { 3225 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3226 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3227 b->free_imax_ilen = PETSC_TRUE; 3228 } 3229 /* b->ilen will count nonzeros in each block row so far. */ 3230 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3231 if (!nnz) { 3232 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3233 else if (nz < 0) nz = 1; 3234 for (i=0; i<mbs; i++) b->imax[i] = nz; 3235 nz = nz*mbs; 3236 } else { 3237 nz = 0; 3238 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3239 } 3240 3241 /* allocate the matrix space */ 3242 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3243 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3244 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3245 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3246 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3247 b->singlemalloc = PETSC_TRUE; 3248 b->i[0] = 0; 3249 for (i=1; i<mbs+1; i++) { 3250 b->i[i] = b->i[i-1] + b->imax[i-1]; 3251 } 3252 b->free_a = PETSC_TRUE; 3253 b->free_ij = PETSC_TRUE; 3254 } else { 3255 b->free_a = PETSC_FALSE; 3256 b->free_ij = PETSC_FALSE; 3257 } 3258 3259 B->rmap->bs = bs; 3260 b->bs2 = bs2; 3261 b->mbs = mbs; 3262 b->nz = 0; 3263 b->maxnz = nz; 3264 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3265 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3266 PetscFunctionReturn(0); 3267 } 3268 EXTERN_C_END 3269 3270 EXTERN_C_BEGIN 3271 #undef __FUNCT__ 3272 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3273 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3274 { 3275 PetscInt i,m,nz,nz_max=0,*nnz; 3276 PetscScalar *values=0; 3277 PetscErrorCode ierr; 3278 3279 PetscFunctionBegin; 3280 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3281 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3282 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3283 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3284 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3285 m = B->rmap->n/bs; 3286 3287 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3288 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3289 for(i=0; i<m; i++) { 3290 nz = ii[i+1]- ii[i]; 3291 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3292 nz_max = PetscMax(nz_max, nz); 3293 nnz[i] = nz; 3294 } 3295 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3296 ierr = PetscFree(nnz);CHKERRQ(ierr); 3297 3298 values = (PetscScalar*)V; 3299 if (!values) { 3300 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3301 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3302 } 3303 for (i=0; i<m; i++) { 3304 PetscInt ncols = ii[i+1] - ii[i]; 3305 const PetscInt *icols = jj + ii[i]; 3306 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3307 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3308 } 3309 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3310 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3311 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3312 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3313 PetscFunctionReturn(0); 3314 } 3315 EXTERN_C_END 3316 3317 3318 EXTERN_C_BEGIN 3319 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3320 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3321 #if defined(PETSC_HAVE_MUMPS) 3322 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3323 #endif 3324 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3325 EXTERN_C_END 3326 3327 /*MC 3328 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3329 block sparse compressed row format. 3330 3331 Options Database Keys: 3332 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3333 3334 Level: beginner 3335 3336 .seealso: MatCreateSeqBAIJ() 3337 M*/ 3338 3339 EXTERN_C_BEGIN 3340 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3341 EXTERN_C_END 3342 3343 EXTERN_C_BEGIN 3344 #undef __FUNCT__ 3345 #define __FUNCT__ "MatCreate_SeqBAIJ" 3346 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3347 { 3348 PetscErrorCode ierr; 3349 PetscMPIInt size; 3350 Mat_SeqBAIJ *b; 3351 3352 PetscFunctionBegin; 3353 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3354 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3355 3356 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3357 B->data = (void*)b; 3358 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3359 b->row = 0; 3360 b->col = 0; 3361 b->icol = 0; 3362 b->reallocs = 0; 3363 b->saved_values = 0; 3364 3365 b->roworiented = PETSC_TRUE; 3366 b->nonew = 0; 3367 b->diag = 0; 3368 b->solve_work = 0; 3369 b->mult_work = 0; 3370 B->spptr = 0; 3371 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3372 b->keepnonzeropattern = PETSC_FALSE; 3373 b->xtoy = 0; 3374 b->XtoY = 0; 3375 B->same_nonzero = PETSC_FALSE; 3376 3377 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3378 "MatGetFactorAvailable_seqbaij_petsc", 3379 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3380 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3381 "MatGetFactor_seqbaij_petsc", 3382 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3383 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3384 "MatGetFactor_seqbaij_bstrm", 3385 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3386 #if defined(PETSC_HAVE_MUMPS) 3387 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3388 #endif 3389 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C", 3390 "MatInvertBlockDiagonal_SeqBAIJ", 3391 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3392 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3393 "MatStoreValues_SeqBAIJ", 3394 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3395 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3396 "MatRetrieveValues_SeqBAIJ", 3397 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3398 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3399 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3400 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3402 "MatConvert_SeqBAIJ_SeqAIJ", 3403 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3404 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3405 "MatConvert_SeqBAIJ_SeqSBAIJ", 3406 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3407 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3408 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3409 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3410 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3411 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3412 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3414 "MatConvert_SeqBAIJ_SeqBSTRM", 3415 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3417 "MatIsTranspose_SeqBAIJ", 3418 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3419 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3420 PetscFunctionReturn(0); 3421 } 3422 EXTERN_C_END 3423 3424 #undef __FUNCT__ 3425 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3426 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3427 { 3428 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3429 PetscErrorCode ierr; 3430 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3431 3432 PetscFunctionBegin; 3433 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3434 3435 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3436 c->imax = a->imax; 3437 c->ilen = a->ilen; 3438 c->free_imax_ilen = PETSC_FALSE; 3439 } else { 3440 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3441 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3442 for (i=0; i<mbs; i++) { 3443 c->imax[i] = a->imax[i]; 3444 c->ilen[i] = a->ilen[i]; 3445 } 3446 c->free_imax_ilen = PETSC_TRUE; 3447 } 3448 3449 /* allocate the matrix space */ 3450 if (mallocmatspace){ 3451 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3452 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3453 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3454 c->singlemalloc = PETSC_FALSE; 3455 c->free_ij = PETSC_FALSE; 3456 c->i = a->i; 3457 c->j = a->j; 3458 c->parent = A; 3459 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3460 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3461 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3462 } else { 3463 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3464 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3465 c->singlemalloc = PETSC_TRUE; 3466 c->free_ij = PETSC_TRUE; 3467 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3468 if (mbs > 0) { 3469 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3470 if (cpvalues == MAT_COPY_VALUES) { 3471 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3472 } else { 3473 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3474 } 3475 } 3476 } 3477 } 3478 3479 c->roworiented = a->roworiented; 3480 c->nonew = a->nonew; 3481 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3482 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3483 c->bs2 = a->bs2; 3484 c->mbs = a->mbs; 3485 c->nbs = a->nbs; 3486 3487 if (a->diag) { 3488 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3489 c->diag = a->diag; 3490 c->free_diag = PETSC_FALSE; 3491 } else { 3492 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3493 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3494 for (i=0; i<mbs; i++) { 3495 c->diag[i] = a->diag[i]; 3496 } 3497 c->free_diag = PETSC_TRUE; 3498 } 3499 } else c->diag = 0; 3500 c->nz = a->nz; 3501 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3502 c->solve_work = 0; 3503 c->mult_work = 0; 3504 c->free_a = PETSC_TRUE; 3505 c->free_ij = PETSC_TRUE; 3506 C->preallocated = PETSC_TRUE; 3507 C->assembled = PETSC_TRUE; 3508 3509 c->compressedrow.use = a->compressedrow.use; 3510 c->compressedrow.nrows = a->compressedrow.nrows; 3511 c->compressedrow.check = a->compressedrow.check; 3512 if (a->compressedrow.use){ 3513 i = a->compressedrow.nrows; 3514 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3515 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3516 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3517 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3518 } else { 3519 c->compressedrow.use = PETSC_FALSE; 3520 c->compressedrow.i = PETSC_NULL; 3521 c->compressedrow.rindex = PETSC_NULL; 3522 } 3523 C->same_nonzero = A->same_nonzero; 3524 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3525 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3526 PetscFunctionReturn(0); 3527 } 3528 3529 #undef __FUNCT__ 3530 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3531 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3532 { 3533 PetscErrorCode ierr; 3534 3535 PetscFunctionBegin; 3536 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3537 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3538 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3539 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3540 PetscFunctionReturn(0); 3541 } 3542 3543 #undef __FUNCT__ 3544 #define __FUNCT__ "MatLoad_SeqBAIJ" 3545 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3546 { 3547 Mat_SeqBAIJ *a; 3548 PetscErrorCode ierr; 3549 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3550 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3551 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3552 PetscInt *masked,nmask,tmp,bs2,ishift; 3553 PetscMPIInt size; 3554 int fd; 3555 PetscScalar *aa; 3556 MPI_Comm comm = ((PetscObject)viewer)->comm; 3557 3558 PetscFunctionBegin; 3559 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3560 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3561 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3562 bs2 = bs*bs; 3563 3564 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3565 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3566 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3567 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3568 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3569 M = header[1]; N = header[2]; nz = header[3]; 3570 3571 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3572 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3573 3574 /* 3575 This code adds extra rows to make sure the number of rows is 3576 divisible by the blocksize 3577 */ 3578 mbs = M/bs; 3579 extra_rows = bs - M + bs*(mbs); 3580 if (extra_rows == bs) extra_rows = 0; 3581 else mbs++; 3582 if (extra_rows) { 3583 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3584 } 3585 3586 /* Set global sizes if not already set */ 3587 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3588 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3589 } else { /* Check if the matrix global sizes are correct */ 3590 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3591 if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */ 3592 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3593 } 3594 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3595 } 3596 3597 /* read in row lengths */ 3598 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3599 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3600 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3601 3602 /* read in column indices */ 3603 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3604 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3605 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3606 3607 /* loop over row lengths determining block row lengths */ 3608 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3609 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3610 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3611 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3612 rowcount = 0; 3613 nzcount = 0; 3614 for (i=0; i<mbs; i++) { 3615 nmask = 0; 3616 for (j=0; j<bs; j++) { 3617 kmax = rowlengths[rowcount]; 3618 for (k=0; k<kmax; k++) { 3619 tmp = jj[nzcount++]/bs; 3620 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3621 } 3622 rowcount++; 3623 } 3624 browlengths[i] += nmask; 3625 /* zero out the mask elements we set */ 3626 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3627 } 3628 3629 /* Do preallocation */ 3630 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3631 a = (Mat_SeqBAIJ*)newmat->data; 3632 3633 /* set matrix "i" values */ 3634 a->i[0] = 0; 3635 for (i=1; i<= mbs; i++) { 3636 a->i[i] = a->i[i-1] + browlengths[i-1]; 3637 a->ilen[i-1] = browlengths[i-1]; 3638 } 3639 a->nz = 0; 3640 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3641 3642 /* read in nonzero values */ 3643 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3644 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3645 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3646 3647 /* set "a" and "j" values into matrix */ 3648 nzcount = 0; jcount = 0; 3649 for (i=0; i<mbs; i++) { 3650 nzcountb = nzcount; 3651 nmask = 0; 3652 for (j=0; j<bs; j++) { 3653 kmax = rowlengths[i*bs+j]; 3654 for (k=0; k<kmax; k++) { 3655 tmp = jj[nzcount++]/bs; 3656 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3657 } 3658 } 3659 /* sort the masked values */ 3660 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3661 3662 /* set "j" values into matrix */ 3663 maskcount = 1; 3664 for (j=0; j<nmask; j++) { 3665 a->j[jcount++] = masked[j]; 3666 mask[masked[j]] = maskcount++; 3667 } 3668 /* set "a" values into matrix */ 3669 ishift = bs2*a->i[i]; 3670 for (j=0; j<bs; j++) { 3671 kmax = rowlengths[i*bs+j]; 3672 for (k=0; k<kmax; k++) { 3673 tmp = jj[nzcountb]/bs ; 3674 block = mask[tmp] - 1; 3675 point = jj[nzcountb] - bs*tmp; 3676 idx = ishift + bs2*block + j + bs*point; 3677 a->a[idx] = (MatScalar)aa[nzcountb++]; 3678 } 3679 } 3680 /* zero out the mask elements we set */ 3681 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3682 } 3683 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3684 3685 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3686 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3687 ierr = PetscFree(aa);CHKERRQ(ierr); 3688 ierr = PetscFree(jj);CHKERRQ(ierr); 3689 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3690 3691 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3693 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3694 PetscFunctionReturn(0); 3695 } 3696 3697 #undef __FUNCT__ 3698 #define __FUNCT__ "MatCreateSeqBAIJ" 3699 /*@C 3700 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3701 compressed row) format. For good matrix assembly performance the 3702 user should preallocate the matrix storage by setting the parameter nz 3703 (or the array nnz). By setting these parameters accurately, performance 3704 during matrix assembly can be increased by more than a factor of 50. 3705 3706 Collective on MPI_Comm 3707 3708 Input Parameters: 3709 + comm - MPI communicator, set to PETSC_COMM_SELF 3710 . bs - size of block 3711 . m - number of rows 3712 . n - number of columns 3713 . nz - number of nonzero blocks per block row (same for all rows) 3714 - nnz - array containing the number of nonzero blocks in the various block rows 3715 (possibly different for each block row) or PETSC_NULL 3716 3717 Output Parameter: 3718 . A - the matrix 3719 3720 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3721 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3722 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3723 3724 Options Database Keys: 3725 . -mat_no_unroll - uses code that does not unroll the loops in the 3726 block calculations (much slower) 3727 . -mat_block_size - size of the blocks to use 3728 3729 Level: intermediate 3730 3731 Notes: 3732 The number of rows and columns must be divisible by blocksize. 3733 3734 If the nnz parameter is given then the nz parameter is ignored 3735 3736 A nonzero block is any block that as 1 or more nonzeros in it 3737 3738 The block AIJ format is fully compatible with standard Fortran 77 3739 storage. That is, the stored row and column indices can begin at 3740 either one (as in Fortran) or zero. See the users' manual for details. 3741 3742 Specify the preallocated storage with either nz or nnz (not both). 3743 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3744 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3745 matrices. 3746 3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3748 @*/ 3749 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3750 { 3751 PetscErrorCode ierr; 3752 3753 PetscFunctionBegin; 3754 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3755 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3756 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3757 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3758 PetscFunctionReturn(0); 3759 } 3760 3761 #undef __FUNCT__ 3762 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3763 /*@C 3764 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3765 per row in the matrix. For good matrix assembly performance the 3766 user should preallocate the matrix storage by setting the parameter nz 3767 (or the array nnz). By setting these parameters accurately, performance 3768 during matrix assembly can be increased by more than a factor of 50. 3769 3770 Collective on MPI_Comm 3771 3772 Input Parameters: 3773 + A - the matrix 3774 . bs - size of block 3775 . nz - number of block nonzeros per block row (same for all rows) 3776 - nnz - array containing the number of block nonzeros in the various block rows 3777 (possibly different for each block row) or PETSC_NULL 3778 3779 Options Database Keys: 3780 . -mat_no_unroll - uses code that does not unroll the loops in the 3781 block calculations (much slower) 3782 . -mat_block_size - size of the blocks to use 3783 3784 Level: intermediate 3785 3786 Notes: 3787 If the nnz parameter is given then the nz parameter is ignored 3788 3789 You can call MatGetInfo() to get information on how effective the preallocation was; 3790 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3791 You can also run with the option -info and look for messages with the string 3792 malloc in them to see if additional memory allocation was needed. 3793 3794 The block AIJ format is fully compatible with standard Fortran 77 3795 storage. That is, the stored row and column indices can begin at 3796 either one (as in Fortran) or zero. See the users' manual for details. 3797 3798 Specify the preallocated storage with either nz or nnz (not both). 3799 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3800 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3801 3802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3803 @*/ 3804 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3805 { 3806 PetscErrorCode ierr; 3807 3808 PetscFunctionBegin; 3809 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3810 PetscValidType(B,1); 3811 PetscValidLogicalCollectiveInt(B,bs,2); 3812 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3813 PetscFunctionReturn(0); 3814 } 3815 3816 #undef __FUNCT__ 3817 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3818 /*@C 3819 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3820 (the default sequential PETSc format). 3821 3822 Collective on MPI_Comm 3823 3824 Input Parameters: 3825 + A - the matrix 3826 . i - the indices into j for the start of each local row (starts with zero) 3827 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3828 - v - optional values in the matrix 3829 3830 Level: developer 3831 3832 .keywords: matrix, aij, compressed row, sparse 3833 3834 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3835 @*/ 3836 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3837 { 3838 PetscErrorCode ierr; 3839 3840 PetscFunctionBegin; 3841 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3842 PetscValidType(B,1); 3843 PetscValidLogicalCollectiveInt(B,bs,2); 3844 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3845 PetscFunctionReturn(0); 3846 } 3847 3848 3849 #undef __FUNCT__ 3850 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3851 /*@ 3852 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3853 3854 Collective on MPI_Comm 3855 3856 Input Parameters: 3857 + comm - must be an MPI communicator of size 1 3858 . bs - size of block 3859 . m - number of rows 3860 . n - number of columns 3861 . i - row indices 3862 . j - column indices 3863 - a - matrix values 3864 3865 Output Parameter: 3866 . mat - the matrix 3867 3868 Level: advanced 3869 3870 Notes: 3871 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3872 once the matrix is destroyed 3873 3874 You cannot set new nonzero locations into this matrix, that will generate an error. 3875 3876 The i and j indices are 0 based 3877 3878 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3879 3880 3881 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3882 3883 @*/ 3884 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3885 { 3886 PetscErrorCode ierr; 3887 PetscInt ii; 3888 Mat_SeqBAIJ *baij; 3889 3890 PetscFunctionBegin; 3891 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3892 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3893 3894 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3895 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3896 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3897 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3898 baij = (Mat_SeqBAIJ*)(*mat)->data; 3899 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3900 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3901 3902 baij->i = i; 3903 baij->j = j; 3904 baij->a = a; 3905 baij->singlemalloc = PETSC_FALSE; 3906 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3907 baij->free_a = PETSC_FALSE; 3908 baij->free_ij = PETSC_FALSE; 3909 3910 for (ii=0; ii<m; ii++) { 3911 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3912 #if defined(PETSC_USE_DEBUG) 3913 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3914 #endif 3915 } 3916 #if defined(PETSC_USE_DEBUG) 3917 for (ii=0; ii<baij->i[m]; ii++) { 3918 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3919 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3920 } 3921 #endif 3922 3923 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3924 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3925 PetscFunctionReturn(0); 3926 } 3927