1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "petscblaslapack.h" 8 #include "../src/mat/blockinvert.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 12 /*@ 13 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 14 15 Collective on Mat 16 17 Input Parameters: 18 . mat - the matrix 19 20 Level: advanced 21 @*/ 22 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 28 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 29 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 30 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 31 PetscFunctionReturn(0); 32 } 33 34 EXTERN_C_BEGIN 35 #undef __FUNCT__ 36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 37 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 40 PetscErrorCode ierr; 41 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 42 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 43 PetscReal shift = 0.0; 44 45 PetscFunctionBegin; 46 if (a->idiagvalid) PetscFunctionReturn(0); 47 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 48 diag_offset = a->diag; 49 if (!a->idiag) { 50 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 51 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 52 } 53 diag = a->idiag; 54 mdiag = a->idiag+bs2*mbs; 55 /* factor and invert each block */ 56 switch (bs){ 57 case 1: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 1*diag_offset[i]; 60 diag[0] = odiag[0]; 61 mdiag[0] = odiag[0]; 62 diag[0] = 1.0 / (diag[0] + shift); 63 diag += 1; 64 mdiag += 1; 65 } 66 break; 67 case 2: 68 for (i=0; i<mbs; i++) { 69 odiag = v + 4*diag_offset[i]; 70 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 71 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 72 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 73 diag += 4; 74 mdiag += 4; 75 } 76 break; 77 case 3: 78 for (i=0; i<mbs; i++) { 79 odiag = v + 9*diag_offset[i]; 80 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 81 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 82 diag[8] = odiag[8]; 83 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 84 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 85 mdiag[8] = odiag[8]; 86 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 87 diag += 9; 88 mdiag += 9; 89 } 90 break; 91 case 4: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 16*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 97 diag += 16; 98 mdiag += 16; 99 } 100 break; 101 case 5: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 25*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 107 diag += 25; 108 mdiag += 25; 109 } 110 break; 111 case 6: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 36*diag_offset[i]; 114 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 127 diag += 49; 128 mdiag += 49; 129 } 130 break; 131 default: 132 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 133 for (i=0; i<mbs; i++) { 134 odiag = v + bs2*diag_offset[i]; 135 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 EXTERN_C_END 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 151 { 152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 153 PetscScalar *x,x1,s1; 154 const PetscScalar *b; 155 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,j; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 its = its*lits; 163 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 164 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 165 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 166 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 167 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 168 169 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 170 171 diag = a->diag; 172 idiag = a->idiag; 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 178 x[0] = b[0]*idiag[0]; 179 i2 = 1; 180 idiag += 1; 181 for (i=1; i<m; i++) { 182 v = aa + ai[i]; 183 vi = aj + ai[i]; 184 nz = diag[i] - ai[i]; 185 s1 = b[i2]; 186 for (j=0; j<nz; j++) { 187 s1 -= v[j]*x[vi[j]]; 188 } 189 x[i2] = idiag[0]*s1; 190 idiag += 1; 191 i2 += 1; 192 } 193 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 194 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 195 } 196 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 197 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 198 i2 = 0; 199 mdiag = a->idiag+a->mbs; 200 for (i=0; i<m; i++) { 201 x1 = x[i2]; 202 x[i2] = mdiag[0]*x1; 203 mdiag += 1; 204 i2 += 1; 205 } 206 ierr = PetscLogFlops(m);CHKERRQ(ierr); 207 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 208 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 209 } 210 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 211 idiag = a->idiag+a->mbs - 1; 212 i2 = m - 1; 213 x1 = x[i2]; 214 x[i2] = idiag[0]*x1; 215 idiag -= 1; 216 i2 -= 1; 217 for (i=m-2; i>=0; i--) { 218 v = aa + (diag[i]+1); 219 vi = aj + diag[i] + 1; 220 nz = ai[i+1] - diag[i] - 1; 221 s1 = x[i2]; 222 for (j=0; j<nz; j++) { 223 s1 -= v[j]*x[vi[j]]; 224 } 225 x[i2] = idiag[0]*s1; 226 idiag -= 1; 227 i2 -= 1; 228 } 229 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 230 } 231 } else { 232 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 233 } 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 PetscScalar *x,x1,x2,s1,s2; 245 const PetscScalar *b; 246 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 247 PetscErrorCode ierr; 248 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 249 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 250 251 PetscFunctionBegin; 252 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 253 its = its*lits; 254 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 255 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 256 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 257 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 258 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 259 260 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 261 262 diag = a->diag; 263 idiag = a->idiag; 264 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 266 267 if (flag & SOR_ZERO_INITIAL_GUESS) { 268 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 269 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 270 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 271 i2 = 2; 272 idiag += 4; 273 for (i=1; i<m; i++) { 274 v = aa + 4*ai[i]; 275 vi = aj + ai[i]; 276 nz = diag[i] - ai[i]; 277 s1 = b[i2]; s2 = b[i2+1]; 278 for (j=0; j<nz; j++) { 279 idx = 2*vi[j]; 280 it = 4*j; 281 x1 = x[idx]; x2 = x[1+idx]; 282 s1 -= v[it]*x1 + v[it+2]*x2; 283 s2 -= v[it+1]*x1 + v[it+3]*x2; 284 } 285 x[i2] = idiag[0]*s1 + idiag[2]*s2; 286 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 287 idiag += 4; 288 i2 += 2; 289 } 290 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 291 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 292 } 293 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 294 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 295 i2 = 0; 296 mdiag = a->idiag+4*a->mbs; 297 for (i=0; i<m; i++) { 298 x1 = x[i2]; x2 = x[i2+1]; 299 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 300 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 301 mdiag += 4; 302 i2 += 2; 303 } 304 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 305 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 306 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 307 } 308 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 309 idiag = a->idiag+4*a->mbs - 4; 310 i2 = 2*m - 2; 311 x1 = x[i2]; x2 = x[i2+1]; 312 x[i2] = idiag[0]*x1 + idiag[2]*x2; 313 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 314 idiag -= 4; 315 i2 -= 2; 316 for (i=m-2; i>=0; i--) { 317 v = aa + 4*(diag[i]+1); 318 vi = aj + diag[i] + 1; 319 nz = ai[i+1] - diag[i] - 1; 320 s1 = x[i2]; s2 = x[i2+1]; 321 for (j=0; j<nz; j++) { 322 idx = 2*vi[j]; 323 it = 4*j; 324 x1 = x[idx]; x2 = x[1+idx]; 325 s1 -= v[it]*x1 + v[it+2]*x2; 326 s2 -= v[it+1]*x1 + v[it+3]*x2; 327 } 328 x[i2] = idiag[0]*s1 + idiag[2]*s2; 329 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 330 idiag -= 4; 331 i2 -= 2; 332 } 333 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 334 } 335 } else { 336 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 337 } 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 346 { 347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 348 PetscScalar *x,x1,x2,x3,s1,s2,s3; 349 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 350 const PetscScalar *b; 351 PetscErrorCode ierr; 352 PetscInt m = a->mbs,i,i2,nz,idx; 353 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 354 355 PetscFunctionBegin; 356 its = its*lits; 357 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 358 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 359 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 360 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 361 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 362 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 363 364 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 365 366 diag = a->diag; 367 idiag = a->idiag; 368 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 369 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 370 371 if (flag & SOR_ZERO_INITIAL_GUESS) { 372 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 373 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 374 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 375 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 376 i2 = 3; 377 idiag += 9; 378 for (i=1; i<m; i++) { 379 v = aa + 9*ai[i]; 380 vi = aj + ai[i]; 381 nz = diag[i] - ai[i]; 382 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 383 while (nz--) { 384 idx = 3*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 386 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 387 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 388 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 389 v += 9; 390 } 391 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 392 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 393 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 394 idiag += 9; 395 i2 += 3; 396 } 397 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 398 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 399 } 400 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 401 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 402 i2 = 0; 403 mdiag = a->idiag+9*a->mbs; 404 for (i=0; i<m; i++) { 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 406 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 407 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 408 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 409 mdiag += 9; 410 i2 += 3; 411 } 412 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 413 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 414 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 415 } 416 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 417 idiag = a->idiag+9*a->mbs - 9; 418 i2 = 3*m - 3; 419 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 420 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 421 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 422 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 423 idiag -= 9; 424 i2 -= 3; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 9*(diag[i]+1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 430 while (nz--) { 431 idx = 3*(*vi++); 432 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 433 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 434 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 435 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 436 v += 9; 437 } 438 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 439 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 440 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 441 idiag -= 9; 442 i2 -= 3; 443 } 444 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 445 } 446 } else { 447 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 448 } 449 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 457 { 458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 459 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 460 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 461 const PetscScalar *b; 462 PetscErrorCode ierr; 463 PetscInt m = a->mbs,i,i2,nz,idx; 464 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 465 466 PetscFunctionBegin; 467 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 468 its = its*lits; 469 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 470 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 471 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 472 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 473 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 474 475 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 476 477 diag = a->diag; 478 idiag = a->idiag; 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 481 482 if (flag & SOR_ZERO_INITIAL_GUESS) { 483 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 484 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 485 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 486 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 487 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 488 i2 = 4; 489 idiag += 16; 490 for (i=1; i<m; i++) { 491 v = aa + 16*ai[i]; 492 vi = aj + ai[i]; 493 nz = diag[i] - ai[i]; 494 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 495 while (nz--) { 496 idx = 4*(*vi++); 497 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 498 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 499 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 500 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 501 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 502 v += 16; 503 } 504 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 505 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 506 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 507 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 508 idiag += 16; 509 i2 += 4; 510 } 511 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 512 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 513 } 514 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 515 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 516 i2 = 0; 517 mdiag = a->idiag+16*a->mbs; 518 for (i=0; i<m; i++) { 519 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 520 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 521 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 522 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 523 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 524 mdiag += 16; 525 i2 += 4; 526 } 527 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 528 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 529 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 530 } 531 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 532 idiag = a->idiag+16*a->mbs - 16; 533 i2 = 4*m - 4; 534 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 535 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 536 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 537 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 538 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 539 idiag -= 16; 540 i2 -= 4; 541 for (i=m-2; i>=0; i--) { 542 v = aa + 16*(diag[i]+1); 543 vi = aj + diag[i] + 1; 544 nz = ai[i+1] - diag[i] - 1; 545 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 546 while (nz--) { 547 idx = 4*(*vi++); 548 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 549 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 550 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 551 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 552 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 553 v += 16; 554 } 555 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 556 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 557 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 558 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 559 idiag -= 16; 560 i2 -= 4; 561 } 562 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 563 } 564 } else { 565 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 566 } 567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 575 { 576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 577 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 578 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 579 const PetscScalar *b; 580 PetscErrorCode ierr; 581 PetscInt m = a->mbs,i,i2,nz,idx; 582 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 583 584 PetscFunctionBegin; 585 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 586 its = its*lits; 587 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 588 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 589 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 590 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 591 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 592 593 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 594 595 diag = a->diag; 596 idiag = a->idiag; 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 599 600 if (flag & SOR_ZERO_INITIAL_GUESS) { 601 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 602 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 603 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 604 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 605 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 606 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 607 i2 = 5; 608 idiag += 25; 609 for (i=1; i<m; i++) { 610 v = aa + 25*ai[i]; 611 vi = aj + ai[i]; 612 nz = diag[i] - ai[i]; 613 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 614 while (nz--) { 615 idx = 5*(*vi++); 616 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 617 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 618 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 619 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 620 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 621 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 622 v += 25; 623 } 624 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 625 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 626 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 627 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 628 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 629 idiag += 25; 630 i2 += 5; 631 } 632 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 633 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 634 } 635 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 636 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 637 i2 = 0; 638 mdiag = a->idiag+25*a->mbs; 639 for (i=0; i<m; i++) { 640 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 641 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 642 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 643 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 644 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 645 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 646 mdiag += 25; 647 i2 += 5; 648 } 649 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 650 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 651 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 652 } 653 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 654 idiag = a->idiag+25*a->mbs - 25; 655 i2 = 5*m - 5; 656 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 657 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 658 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 659 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 660 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 661 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 662 idiag -= 25; 663 i2 -= 5; 664 for (i=m-2; i>=0; i--) { 665 v = aa + 25*(diag[i]+1); 666 vi = aj + diag[i] + 1; 667 nz = ai[i+1] - diag[i] - 1; 668 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 672 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 673 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 674 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 675 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 676 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 677 v += 25; 678 } 679 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 680 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 681 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 682 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 683 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 684 idiag -= 25; 685 i2 -= 5; 686 } 687 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 688 } 689 } else { 690 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 691 } 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 700 { 701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 702 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 703 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 704 const PetscScalar *b; 705 PetscErrorCode ierr; 706 PetscInt m = a->mbs,i,i2,nz,idx; 707 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 708 709 PetscFunctionBegin; 710 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 711 its = its*lits; 712 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 713 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 714 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 715 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 716 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 717 718 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 719 720 diag = a->diag; 721 idiag = a->idiag; 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 724 725 if (flag & SOR_ZERO_INITIAL_GUESS) { 726 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 727 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 728 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 729 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 730 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 731 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 732 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 733 i2 = 6; 734 idiag += 36; 735 for (i=1; i<m; i++) { 736 v = aa + 36*ai[i]; 737 vi = aj + ai[i]; 738 nz = diag[i] - ai[i]; 739 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 740 while (nz--) { 741 idx = 6*(*vi++); 742 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 743 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 744 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 745 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 746 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 747 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 748 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 749 v += 36; 750 } 751 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 752 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 753 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 754 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 755 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 756 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 757 idiag += 36; 758 i2 += 6; 759 } 760 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 761 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 762 } 763 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 764 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 765 i2 = 0; 766 mdiag = a->idiag+36*a->mbs; 767 for (i=0; i<m; i++) { 768 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 769 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 770 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 771 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 772 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 773 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 774 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 775 mdiag += 36; 776 i2 += 6; 777 } 778 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 779 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 780 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 781 } 782 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 783 idiag = a->idiag+36*a->mbs - 36; 784 i2 = 6*m - 6; 785 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 786 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 787 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 788 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 789 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 790 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 791 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 792 idiag -= 36; 793 i2 -= 6; 794 for (i=m-2; i>=0; i--) { 795 v = aa + 36*(diag[i]+1); 796 vi = aj + diag[i] + 1; 797 nz = ai[i+1] - diag[i] - 1; 798 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 799 while (nz--) { 800 idx = 6*(*vi++); 801 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 802 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 803 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 804 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 805 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 806 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 807 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 808 v += 36; 809 } 810 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 811 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 812 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 813 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 814 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 815 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 816 idiag -= 36; 817 i2 -= 6; 818 } 819 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 820 } 821 } else { 822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 823 } 824 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 825 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 834 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 835 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 836 const PetscScalar *b; 837 PetscErrorCode ierr; 838 PetscInt m = a->mbs,i,i2,nz,idx; 839 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 840 841 PetscFunctionBegin; 842 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 843 its = its*lits; 844 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 845 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 846 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 847 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 848 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 849 850 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 851 852 diag = a->diag; 853 idiag = a->idiag; 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 856 857 if (flag & SOR_ZERO_INITIAL_GUESS) { 858 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 859 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 860 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 861 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 862 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 863 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 864 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 865 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 866 i2 = 7; 867 idiag += 49; 868 for (i=1; i<m; i++) { 869 v = aa + 49*ai[i]; 870 vi = aj + ai[i]; 871 nz = diag[i] - ai[i]; 872 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 873 while (nz--) { 874 idx = 7*(*vi++); 875 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 876 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 877 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 878 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 879 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 880 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 881 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 882 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 883 v += 49; 884 } 885 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 886 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 887 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 888 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 889 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 890 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 891 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 892 idiag += 49; 893 i2 += 7; 894 } 895 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 896 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 897 } 898 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 899 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 900 i2 = 0; 901 mdiag = a->idiag+49*a->mbs; 902 for (i=0; i<m; i++) { 903 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 904 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 905 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 906 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 907 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 908 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 909 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 910 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 911 mdiag += 49; 912 i2 += 7; 913 } 914 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 915 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 916 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 917 } 918 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 919 idiag = a->idiag+49*a->mbs - 49; 920 i2 = 7*m - 7; 921 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 922 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 923 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 924 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 925 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 926 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 927 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 928 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 929 idiag -= 49; 930 i2 -= 7; 931 for (i=m-2; i>=0; i--) { 932 v = aa + 49*(diag[i]+1); 933 vi = aj + diag[i] + 1; 934 nz = ai[i+1] - diag[i] - 1; 935 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 936 while (nz--) { 937 idx = 7*(*vi++); 938 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 939 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 940 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 941 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 942 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 943 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 944 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 945 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 946 v += 49; 947 } 948 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 949 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 950 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 951 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 952 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 953 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 954 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 955 idiag -= 49; 956 i2 -= 7; 957 } 958 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 959 } 960 } else { 961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscScalar *x,*work,*w,*workt; 974 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 975 const PetscScalar *b; 976 PetscErrorCode ierr; 977 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 978 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 979 980 PetscFunctionBegin; 981 its = its*lits; 982 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 983 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 984 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 985 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 986 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 987 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 988 989 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 990 991 diag = a->diag; 992 idiag = a->idiag; 993 if (!a->mult_work) { 994 k = PetscMax(A->rmap->n,A->cmap->n); 995 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 996 } 997 work = a->mult_work; 998 if (!a->sor_work) { 999 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1000 } 1001 w = a->sor_work; 1002 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1005 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1008 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1009 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1010 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1011 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1012 i2 = bs; 1013 idiag += bs2; 1014 for (i=1; i<m; i++) { 1015 v = aa + bs2*ai[i]; 1016 vi = aj + ai[i]; 1017 nz = diag[i] - ai[i]; 1018 1019 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1020 /* copy all rows of x that are needed into contiguous space */ 1021 workt = work; 1022 for (j=0; j<nz; j++) { 1023 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1024 workt += bs; 1025 } 1026 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1027 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1028 while (nz--) { 1029 idx = N*(*vi++); 1030 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1031 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1032 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1033 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1034 v += N2; 1035 } */ 1036 1037 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1038 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1039 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1040 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1041 1042 idiag += bs2; 1043 i2 += bs; 1044 } 1045 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1046 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1047 } 1048 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1049 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1050 i2 = 0; 1051 mdiag = a->idiag+bs2*a->mbs; 1052 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 for (i=0; i<m; i++) { 1054 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1055 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1056 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1057 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1058 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1059 1060 mdiag += bs2; 1061 i2 += bs; 1062 } 1063 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1064 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1065 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1066 } 1067 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 idiag = a->idiag+bs2*a->mbs - bs2; 1069 i2 = bs*m - bs; 1070 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1071 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1072 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1073 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1074 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1075 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1076 idiag -= bs2; 1077 i2 -= bs; 1078 for (i=m-2; i>=0; i--) { 1079 v = aa + bs2*(diag[i]+1); 1080 vi = aj + diag[i] + 1; 1081 nz = ai[i+1] - diag[i] - 1; 1082 1083 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1084 /* copy all rows of x that are needed into contiguous space */ 1085 workt = work; 1086 for (j=0; j<nz; j++) { 1087 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1088 workt += bs; 1089 } 1090 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1091 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1092 while (nz--) { 1093 idx = N*(*vi++); 1094 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1095 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1096 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1097 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1098 v += N2; 1099 } */ 1100 1101 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1102 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1103 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1104 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1105 idiag -= bs2; 1106 i2 -= bs; 1107 } 1108 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1109 } 1110 } else { 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1112 } 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* 1119 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1120 */ 1121 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1124 #define matsetvaluesblocked4_ matsetvaluesblocked4 1125 #endif 1126 1127 EXTERN_C_BEGIN 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "matsetvaluesblocked4_" 1130 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1131 { 1132 Mat A = *AA; 1133 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1134 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1135 PetscInt *ai=a->i,*ailen=a->ilen; 1136 PetscInt *aj=a->j,stepval,lastcol = -1; 1137 const PetscScalar *value = v; 1138 MatScalar *ap,*aa = a->a,*bap; 1139 1140 PetscFunctionBegin; 1141 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1142 stepval = (n-1)*4; 1143 for (k=0; k<m; k++) { /* loop over added rows */ 1144 row = im[k]; 1145 rp = aj + ai[row]; 1146 ap = aa + 16*ai[row]; 1147 nrow = ailen[row]; 1148 low = 0; 1149 high = nrow; 1150 for (l=0; l<n; l++) { /* loop over added columns */ 1151 col = in[l]; 1152 if (col <= lastcol) low = 0; else high = nrow; 1153 lastcol = col; 1154 value = v + k*(stepval+4 + l)*4; 1155 while (high-low > 7) { 1156 t = (low+high)/2; 1157 if (rp[t] > col) high = t; 1158 else low = t; 1159 } 1160 for (i=low; i<high; i++) { 1161 if (rp[i] > col) break; 1162 if (rp[i] == col) { 1163 bap = ap + 16*i; 1164 for (ii=0; ii<4; ii++,value+=stepval) { 1165 for (jj=ii; jj<16; jj+=4) { 1166 bap[jj] += *value++; 1167 } 1168 } 1169 goto noinsert2; 1170 } 1171 } 1172 N = nrow++ - 1; 1173 high++; /* added new column index thus must search to one higher than before */ 1174 /* shift up all the later entries in this row */ 1175 for (ii=N; ii>=i; ii--) { 1176 rp[ii+1] = rp[ii]; 1177 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1178 } 1179 if (N >= i) { 1180 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1181 } 1182 rp[i] = col; 1183 bap = ap + 16*i; 1184 for (ii=0; ii<4; ii++,value+=stepval) { 1185 for (jj=ii; jj<16; jj+=4) { 1186 bap[jj] = *value++; 1187 } 1188 } 1189 noinsert2:; 1190 low = i; 1191 } 1192 ailen[row] = nrow; 1193 } 1194 PetscFunctionReturnVoid(); 1195 } 1196 EXTERN_C_END 1197 1198 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1199 #define matsetvalues4_ MATSETVALUES4 1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1201 #define matsetvalues4_ matsetvalues4 1202 #endif 1203 1204 EXTERN_C_BEGIN 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSetValues4_" 1207 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1208 { 1209 Mat A = *AA; 1210 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1211 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1212 PetscInt *ai=a->i,*ailen=a->ilen; 1213 PetscInt *aj=a->j,brow,bcol; 1214 PetscInt ridx,cidx,lastcol = -1; 1215 MatScalar *ap,value,*aa=a->a,*bap; 1216 1217 PetscFunctionBegin; 1218 for (k=0; k<m; k++) { /* loop over added rows */ 1219 row = im[k]; brow = row/4; 1220 rp = aj + ai[brow]; 1221 ap = aa + 16*ai[brow]; 1222 nrow = ailen[brow]; 1223 low = 0; 1224 high = nrow; 1225 for (l=0; l<n; l++) { /* loop over added columns */ 1226 col = in[l]; bcol = col/4; 1227 ridx = row % 4; cidx = col % 4; 1228 value = v[l + k*n]; 1229 if (col <= lastcol) low = 0; else high = nrow; 1230 lastcol = col; 1231 while (high-low > 7) { 1232 t = (low+high)/2; 1233 if (rp[t] > bcol) high = t; 1234 else low = t; 1235 } 1236 for (i=low; i<high; i++) { 1237 if (rp[i] > bcol) break; 1238 if (rp[i] == bcol) { 1239 bap = ap + 16*i + 4*cidx + ridx; 1240 *bap += value; 1241 goto noinsert1; 1242 } 1243 } 1244 N = nrow++ - 1; 1245 high++; /* added new column thus must search to one higher than before */ 1246 /* shift up all the later entries in this row */ 1247 for (ii=N; ii>=i; ii--) { 1248 rp[ii+1] = rp[ii]; 1249 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1250 } 1251 if (N>=i) { 1252 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1253 } 1254 rp[i] = bcol; 1255 ap[16*i + 4*cidx + ridx] = value; 1256 noinsert1:; 1257 low = i; 1258 } 1259 ailen[brow] = nrow; 1260 } 1261 PetscFunctionReturnVoid(); 1262 } 1263 EXTERN_C_END 1264 1265 /* 1266 Checks for missing diagonals 1267 */ 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 PetscInt *diag,*jj = a->j,i; 1275 1276 PetscFunctionBegin; 1277 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1278 *missing = PETSC_FALSE; 1279 if (A->rmap->n > 0 && !jj) { 1280 *missing = PETSC_TRUE; 1281 if (d) *d = 0; 1282 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1283 } else { 1284 diag = a->diag; 1285 for (i=0; i<a->mbs; i++) { 1286 if (jj[diag[i]] != i) { 1287 *missing = PETSC_TRUE; 1288 if (d) *d = i; 1289 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1290 } 1291 } 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1301 PetscErrorCode ierr; 1302 PetscInt i,j,m = a->mbs; 1303 1304 PetscFunctionBegin; 1305 if (!a->diag) { 1306 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1307 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1308 a->free_diag = PETSC_TRUE; 1309 } 1310 for (i=0; i<m; i++) { 1311 a->diag[i] = a->i[i+1]; 1312 for (j=a->i[i]; j<a->i[i+1]; j++) { 1313 if (a->j[j] == i) { 1314 a->diag[i] = j; 1315 break; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1328 { 1329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1332 PetscInt *tia, *tja; 1333 1334 PetscFunctionBegin; 1335 *nn = n; 1336 if (!ia) PetscFunctionReturn(0); 1337 if (symmetric) { 1338 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1339 nz = tia[n]; 1340 } else { 1341 tia = a->i; tja = a->j; 1342 } 1343 1344 if (!blockcompressed && bs > 1) { 1345 (*nn) *= bs; 1346 /* malloc & create the natural set of indices */ 1347 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1348 if (n) { 1349 (*ia)[0] = 0; 1350 for (j=1; j<bs; j++) { 1351 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1352 } 1353 } 1354 1355 for (i=1; i<n; i++) { 1356 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1357 for (j=1; j<bs; j++) { 1358 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1359 } 1360 } 1361 if (n) { 1362 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1363 } 1364 1365 if (ja) { 1366 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1367 cnt = 0; 1368 for (i=0; i<n; i++) { 1369 for (j=0; j<bs; j++) { 1370 for (k=tia[i]; k<tia[i+1]; k++) { 1371 for (l=0; l<bs; l++) { 1372 (*ja)[cnt++] = bs*tja[k] + l; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 1379 n *= bs; 1380 nz *= bs*bs; 1381 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1382 ierr = PetscFree(tia);CHKERRQ(ierr); 1383 ierr = PetscFree(tja);CHKERRQ(ierr); 1384 } 1385 } else if (oshift == 1) { 1386 if (symmetric) { 1387 PetscInt nz = tia[A->rmap->n/bs]; 1388 /* add 1 to i and j indices */ 1389 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1390 *ia = tia; 1391 if (ja) { 1392 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1393 *ja = tja; 1394 } 1395 } else { 1396 PetscInt nz = a->i[A->rmap->n/bs]; 1397 /* malloc space and add 1 to i and j indices */ 1398 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1399 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1400 if (ja) { 1401 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1402 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1403 } 1404 } 1405 } else { 1406 *ia = tia; 1407 if (ja) *ja = tja; 1408 } 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1416 { 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (!ia) PetscFunctionReturn(0); 1421 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1422 ierr = PetscFree(*ia);CHKERRQ(ierr); 1423 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 #if defined(PETSC_USE_LOG) 1437 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1438 #endif 1439 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1440 if (a->row) { 1441 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1442 } 1443 if (a->col) { 1444 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1445 } 1446 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1447 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1448 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1449 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1450 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1451 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1452 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1453 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1454 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1455 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1456 1457 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1458 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1459 ierr = PetscFree(a);CHKERRQ(ierr); 1460 1461 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1462 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1466 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1467 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1468 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1469 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1475 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1476 { 1477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 switch (op) { 1482 case MAT_ROW_ORIENTED: 1483 a->roworiented = flg; 1484 break; 1485 case MAT_KEEP_NONZERO_PATTERN: 1486 a->keepnonzeropattern = flg; 1487 break; 1488 case MAT_NEW_NONZERO_LOCATIONS: 1489 a->nonew = (flg ? 0 : 1); 1490 break; 1491 case MAT_NEW_NONZERO_LOCATION_ERR: 1492 a->nonew = (flg ? -1 : 0); 1493 break; 1494 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1495 a->nonew = (flg ? -2 : 0); 1496 break; 1497 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1498 a->nounused = (flg ? -1 : 0); 1499 break; 1500 case MAT_CHECK_COMPRESSED_ROW: 1501 a->compressedrow.check = flg; 1502 break; 1503 case MAT_NEW_DIAGONALS: 1504 case MAT_IGNORE_OFF_PROC_ENTRIES: 1505 case MAT_USE_HASH_TABLE: 1506 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1507 break; 1508 case MAT_SYMMETRIC: 1509 case MAT_STRUCTURALLY_SYMMETRIC: 1510 case MAT_HERMITIAN: 1511 case MAT_SYMMETRY_ETERNAL: 1512 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1513 break; 1514 default: 1515 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1516 } 1517 PetscFunctionReturn(0); 1518 } 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1522 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1523 { 1524 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1527 MatScalar *aa,*aa_i; 1528 PetscScalar *v_i; 1529 1530 PetscFunctionBegin; 1531 bs = A->rmap->bs; 1532 ai = a->i; 1533 aj = a->j; 1534 aa = a->a; 1535 bs2 = a->bs2; 1536 1537 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1538 1539 bn = row/bs; /* Block number */ 1540 bp = row % bs; /* Block Position */ 1541 M = ai[bn+1] - ai[bn]; 1542 *nz = bs*M; 1543 1544 if (v) { 1545 *v = 0; 1546 if (*nz) { 1547 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1548 for (i=0; i<M; i++) { /* for each block in the block row */ 1549 v_i = *v + i*bs; 1550 aa_i = aa + bs2*(ai[bn] + i); 1551 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1552 } 1553 } 1554 } 1555 1556 if (idx) { 1557 *idx = 0; 1558 if (*nz) { 1559 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1560 for (i=0; i<M; i++) { /* for each block in the block row */ 1561 idx_i = *idx + i*bs; 1562 itmp = bs*aj[ai[bn] + i]; 1563 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1564 } 1565 } 1566 } 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1572 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1573 { 1574 PetscErrorCode ierr; 1575 1576 PetscFunctionBegin; 1577 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1578 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1579 PetscFunctionReturn(0); 1580 } 1581 1582 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1586 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1587 { 1588 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1589 Mat C; 1590 PetscErrorCode ierr; 1591 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1592 PetscInt *rows,*cols,bs2=a->bs2; 1593 MatScalar *array; 1594 1595 PetscFunctionBegin; 1596 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1597 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1598 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1599 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1600 1601 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1602 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1603 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1604 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1605 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1606 ierr = PetscFree(col);CHKERRQ(ierr); 1607 } else { 1608 C = *B; 1609 } 1610 1611 array = a->a; 1612 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1613 for (i=0; i<mbs; i++) { 1614 cols[0] = i*bs; 1615 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1616 len = ai[i+1] - ai[i]; 1617 for (j=0; j<len; j++) { 1618 rows[0] = (*aj++)*bs; 1619 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1620 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1621 array += bs2; 1622 } 1623 } 1624 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1625 1626 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1627 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1628 1629 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1630 *B = C; 1631 } else { 1632 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1639 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1640 { 1641 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1642 PetscErrorCode ierr; 1643 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1644 int fd; 1645 PetscScalar *aa; 1646 FILE *file; 1647 1648 PetscFunctionBegin; 1649 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1650 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1651 col_lens[0] = MAT_FILE_CLASSID; 1652 1653 col_lens[1] = A->rmap->N; 1654 col_lens[2] = A->cmap->n; 1655 col_lens[3] = a->nz*bs2; 1656 1657 /* store lengths of each row and write (including header) to file */ 1658 count = 0; 1659 for (i=0; i<a->mbs; i++) { 1660 for (j=0; j<bs; j++) { 1661 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1662 } 1663 } 1664 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1665 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1666 1667 /* store column indices (zero start index) */ 1668 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1669 count = 0; 1670 for (i=0; i<a->mbs; i++) { 1671 for (j=0; j<bs; j++) { 1672 for (k=a->i[i]; k<a->i[i+1]; k++) { 1673 for (l=0; l<bs; l++) { 1674 jj[count++] = bs*a->j[k] + l; 1675 } 1676 } 1677 } 1678 } 1679 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1680 ierr = PetscFree(jj);CHKERRQ(ierr); 1681 1682 /* store nonzero values */ 1683 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1684 count = 0; 1685 for (i=0; i<a->mbs; i++) { 1686 for (j=0; j<bs; j++) { 1687 for (k=a->i[i]; k<a->i[i+1]; k++) { 1688 for (l=0; l<bs; l++) { 1689 aa[count++] = a->a[bs2*k + l*bs + j]; 1690 } 1691 } 1692 } 1693 } 1694 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1695 ierr = PetscFree(aa);CHKERRQ(ierr); 1696 1697 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1698 if (file) { 1699 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1706 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1707 { 1708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1709 PetscErrorCode ierr; 1710 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1711 PetscViewerFormat format; 1712 1713 PetscFunctionBegin; 1714 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1715 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1716 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1717 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1718 Mat aij; 1719 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1720 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1721 ierr = MatDestroy(aij);CHKERRQ(ierr); 1722 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1723 PetscFunctionReturn(0); 1724 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1725 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1726 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1727 for (i=0; i<a->mbs; i++) { 1728 for (j=0; j<bs; j++) { 1729 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1730 for (k=a->i[i]; k<a->i[i+1]; k++) { 1731 for (l=0; l<bs; l++) { 1732 #if defined(PETSC_USE_COMPLEX) 1733 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1734 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1735 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1736 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1737 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1738 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1739 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1740 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1741 } 1742 #else 1743 if (a->a[bs2*k + l*bs + j] != 0.0) { 1744 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1745 } 1746 #endif 1747 } 1748 } 1749 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1750 } 1751 } 1752 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1753 } else { 1754 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1755 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1756 for (i=0; i<a->mbs; i++) { 1757 for (j=0; j<bs; j++) { 1758 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1759 for (k=a->i[i]; k<a->i[i+1]; k++) { 1760 for (l=0; l<bs; l++) { 1761 #if defined(PETSC_USE_COMPLEX) 1762 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1763 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1764 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1765 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1766 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1767 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1768 } else { 1769 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1770 } 1771 #else 1772 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1773 #endif 1774 } 1775 } 1776 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1777 } 1778 } 1779 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1780 } 1781 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1787 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1788 { 1789 Mat A = (Mat) Aa; 1790 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1791 PetscErrorCode ierr; 1792 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1793 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1794 MatScalar *aa; 1795 PetscViewer viewer; 1796 1797 PetscFunctionBegin; 1798 1799 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1800 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1801 1802 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1803 1804 /* loop over matrix elements drawing boxes */ 1805 color = PETSC_DRAW_BLUE; 1806 for (i=0,row=0; i<mbs; i++,row+=bs) { 1807 for (j=a->i[i]; j<a->i[i+1]; j++) { 1808 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1809 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1810 aa = a->a + j*bs2; 1811 for (k=0; k<bs; k++) { 1812 for (l=0; l<bs; l++) { 1813 if (PetscRealPart(*aa++) >= 0.) continue; 1814 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1815 } 1816 } 1817 } 1818 } 1819 color = PETSC_DRAW_CYAN; 1820 for (i=0,row=0; i<mbs; i++,row+=bs) { 1821 for (j=a->i[i]; j<a->i[i+1]; j++) { 1822 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1823 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1824 aa = a->a + j*bs2; 1825 for (k=0; k<bs; k++) { 1826 for (l=0; l<bs; l++) { 1827 if (PetscRealPart(*aa++) != 0.) continue; 1828 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1829 } 1830 } 1831 } 1832 } 1833 1834 color = PETSC_DRAW_RED; 1835 for (i=0,row=0; i<mbs; i++,row+=bs) { 1836 for (j=a->i[i]; j<a->i[i+1]; j++) { 1837 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1838 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1839 aa = a->a + j*bs2; 1840 for (k=0; k<bs; k++) { 1841 for (l=0; l<bs; l++) { 1842 if (PetscRealPart(*aa++) <= 0.) continue; 1843 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1844 } 1845 } 1846 } 1847 } 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1853 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1854 { 1855 PetscErrorCode ierr; 1856 PetscReal xl,yl,xr,yr,w,h; 1857 PetscDraw draw; 1858 PetscBool isnull; 1859 1860 PetscFunctionBegin; 1861 1862 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1863 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1864 1865 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1866 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1867 xr += w; yr += h; xl = -w; yl = -h; 1868 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1869 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1870 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatView_SeqBAIJ" 1876 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1877 { 1878 PetscErrorCode ierr; 1879 PetscBool iascii,isbinary,isdraw; 1880 1881 PetscFunctionBegin; 1882 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1883 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1884 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1885 if (iascii){ 1886 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1887 } else if (isbinary) { 1888 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1889 } else if (isdraw) { 1890 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1891 } else { 1892 Mat B; 1893 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1894 ierr = MatView(B,viewer);CHKERRQ(ierr); 1895 ierr = MatDestroy(B);CHKERRQ(ierr); 1896 } 1897 PetscFunctionReturn(0); 1898 } 1899 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1903 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1904 { 1905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1906 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1907 PetscInt *ai = a->i,*ailen = a->ilen; 1908 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1909 MatScalar *ap,*aa = a->a; 1910 1911 PetscFunctionBegin; 1912 for (k=0; k<m; k++) { /* loop over rows */ 1913 row = im[k]; brow = row/bs; 1914 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1915 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1916 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1917 nrow = ailen[brow]; 1918 for (l=0; l<n; l++) { /* loop over columns */ 1919 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1920 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1921 col = in[l] ; 1922 bcol = col/bs; 1923 cidx = col%bs; 1924 ridx = row%bs; 1925 high = nrow; 1926 low = 0; /* assume unsorted */ 1927 while (high-low > 5) { 1928 t = (low+high)/2; 1929 if (rp[t] > bcol) high = t; 1930 else low = t; 1931 } 1932 for (i=low; i<high; i++) { 1933 if (rp[i] > bcol) break; 1934 if (rp[i] == bcol) { 1935 *v++ = ap[bs2*i+bs*cidx+ridx]; 1936 goto finished; 1937 } 1938 } 1939 *v++ = 0.0; 1940 finished:; 1941 } 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1948 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1949 { 1950 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1951 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1952 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1953 PetscErrorCode ierr; 1954 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1955 PetscBool roworiented=a->roworiented; 1956 const PetscScalar *value = v; 1957 MatScalar *ap,*aa = a->a,*bap; 1958 1959 PetscFunctionBegin; 1960 if (roworiented) { 1961 stepval = (n-1)*bs; 1962 } else { 1963 stepval = (m-1)*bs; 1964 } 1965 for (k=0; k<m; k++) { /* loop over added rows */ 1966 row = im[k]; 1967 if (row < 0) continue; 1968 #if defined(PETSC_USE_DEBUG) 1969 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1970 #endif 1971 rp = aj + ai[row]; 1972 ap = aa + bs2*ai[row]; 1973 rmax = imax[row]; 1974 nrow = ailen[row]; 1975 low = 0; 1976 high = nrow; 1977 for (l=0; l<n; l++) { /* loop over added columns */ 1978 if (in[l] < 0) continue; 1979 #if defined(PETSC_USE_DEBUG) 1980 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1981 #endif 1982 col = in[l]; 1983 if (roworiented) { 1984 value = v + (k*(stepval+bs) + l)*bs; 1985 } else { 1986 value = v + (l*(stepval+bs) + k)*bs; 1987 } 1988 if (col <= lastcol) low = 0; else high = nrow; 1989 lastcol = col; 1990 while (high-low > 7) { 1991 t = (low+high)/2; 1992 if (rp[t] > col) high = t; 1993 else low = t; 1994 } 1995 for (i=low; i<high; i++) { 1996 if (rp[i] > col) break; 1997 if (rp[i] == col) { 1998 bap = ap + bs2*i; 1999 if (roworiented) { 2000 if (is == ADD_VALUES) { 2001 for (ii=0; ii<bs; ii++,value+=stepval) { 2002 for (jj=ii; jj<bs2; jj+=bs) { 2003 bap[jj] += *value++; 2004 } 2005 } 2006 } else { 2007 for (ii=0; ii<bs; ii++,value+=stepval) { 2008 for (jj=ii; jj<bs2; jj+=bs) { 2009 bap[jj] = *value++; 2010 } 2011 } 2012 } 2013 } else { 2014 if (is == ADD_VALUES) { 2015 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2016 for (jj=0; jj<bs; jj++) { 2017 bap[jj] += value[jj]; 2018 } 2019 bap += bs; 2020 } 2021 } else { 2022 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2023 for (jj=0; jj<bs; jj++) { 2024 bap[jj] = value[jj]; 2025 } 2026 bap += bs; 2027 } 2028 } 2029 } 2030 goto noinsert2; 2031 } 2032 } 2033 if (nonew == 1) goto noinsert2; 2034 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2035 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2036 N = nrow++ - 1; high++; 2037 /* shift up all the later entries in this row */ 2038 for (ii=N; ii>=i; ii--) { 2039 rp[ii+1] = rp[ii]; 2040 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2041 } 2042 if (N >= i) { 2043 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2044 } 2045 rp[i] = col; 2046 bap = ap + bs2*i; 2047 if (roworiented) { 2048 for (ii=0; ii<bs; ii++,value+=stepval) { 2049 for (jj=ii; jj<bs2; jj+=bs) { 2050 bap[jj] = *value++; 2051 } 2052 } 2053 } else { 2054 for (ii=0; ii<bs; ii++,value+=stepval) { 2055 for (jj=0; jj<bs; jj++) { 2056 *bap++ = *value++; 2057 } 2058 } 2059 } 2060 noinsert2:; 2061 low = i; 2062 } 2063 ailen[row] = nrow; 2064 } 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2070 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2071 { 2072 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2073 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2074 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2075 PetscErrorCode ierr; 2076 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2077 MatScalar *aa = a->a,*ap; 2078 PetscReal ratio=0.6; 2079 2080 PetscFunctionBegin; 2081 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2082 2083 if (m) rmax = ailen[0]; 2084 for (i=1; i<mbs; i++) { 2085 /* move each row back by the amount of empty slots (fshift) before it*/ 2086 fshift += imax[i-1] - ailen[i-1]; 2087 rmax = PetscMax(rmax,ailen[i]); 2088 if (fshift) { 2089 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2090 N = ailen[i]; 2091 for (j=0; j<N; j++) { 2092 ip[j-fshift] = ip[j]; 2093 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2094 } 2095 } 2096 ai[i] = ai[i-1] + ailen[i-1]; 2097 } 2098 if (mbs) { 2099 fshift += imax[mbs-1] - ailen[mbs-1]; 2100 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2101 } 2102 /* reset ilen and imax for each row */ 2103 for (i=0; i<mbs; i++) { 2104 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2105 } 2106 a->nz = ai[mbs]; 2107 2108 /* diagonals may have moved, so kill the diagonal pointers */ 2109 a->idiagvalid = PETSC_FALSE; 2110 if (fshift && a->diag) { 2111 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2112 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2113 a->diag = 0; 2114 } 2115 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2116 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2117 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2118 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2119 A->info.mallocs += a->reallocs; 2120 a->reallocs = 0; 2121 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2122 2123 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2124 A->same_nonzero = PETSC_TRUE; 2125 PetscFunctionReturn(0); 2126 } 2127 2128 /* 2129 This function returns an array of flags which indicate the locations of contiguous 2130 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2131 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2132 Assume: sizes should be long enough to hold all the values. 2133 */ 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2136 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2137 { 2138 PetscInt i,j,k,row; 2139 PetscBool flg; 2140 2141 PetscFunctionBegin; 2142 for (i=0,j=0; i<n; j++) { 2143 row = idx[i]; 2144 if (row%bs!=0) { /* Not the begining of a block */ 2145 sizes[j] = 1; 2146 i++; 2147 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2148 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2149 i++; 2150 } else { /* Begining of the block, so check if the complete block exists */ 2151 flg = PETSC_TRUE; 2152 for (k=1; k<bs; k++) { 2153 if (row+k != idx[i+k]) { /* break in the block */ 2154 flg = PETSC_FALSE; 2155 break; 2156 } 2157 } 2158 if (flg) { /* No break in the bs */ 2159 sizes[j] = bs; 2160 i+= bs; 2161 } else { 2162 sizes[j] = 1; 2163 i++; 2164 } 2165 } 2166 } 2167 *bs_max = j; 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #undef __FUNCT__ 2172 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2173 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2174 { 2175 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2176 PetscErrorCode ierr; 2177 PetscInt i,j,k,count,*rows; 2178 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2179 PetscScalar zero = 0.0; 2180 MatScalar *aa; 2181 const PetscScalar *xx; 2182 PetscScalar *bb; 2183 2184 PetscFunctionBegin; 2185 /* fix right hand side if needed */ 2186 if (x && b) { 2187 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2188 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2189 for (i=0; i<is_n; i++) { 2190 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2191 } 2192 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2193 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2194 } 2195 2196 /* Make a copy of the IS and sort it */ 2197 /* allocate memory for rows,sizes */ 2198 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2199 2200 /* copy IS values to rows, and sort them */ 2201 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2202 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2203 2204 if (baij->keepnonzeropattern) { 2205 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2206 bs_max = is_n; 2207 A->same_nonzero = PETSC_TRUE; 2208 } else { 2209 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2210 A->same_nonzero = PETSC_FALSE; 2211 } 2212 2213 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2214 row = rows[j]; 2215 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2216 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2217 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2218 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2219 if (diag != 0.0) { 2220 if (baij->ilen[row/bs] > 0) { 2221 baij->ilen[row/bs] = 1; 2222 baij->j[baij->i[row/bs]] = row/bs; 2223 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2224 } 2225 /* Now insert all the diagonal values for this bs */ 2226 for (k=0; k<bs; k++) { 2227 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2228 } 2229 } else { /* (diag == 0.0) */ 2230 baij->ilen[row/bs] = 0; 2231 } /* end (diag == 0.0) */ 2232 } else { /* (sizes[i] != bs) */ 2233 #if defined (PETSC_USE_DEBUG) 2234 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2235 #endif 2236 for (k=0; k<count; k++) { 2237 aa[0] = zero; 2238 aa += bs; 2239 } 2240 if (diag != 0.0) { 2241 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2242 } 2243 } 2244 } 2245 2246 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2247 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2253 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2254 { 2255 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2256 PetscErrorCode ierr; 2257 PetscInt i,j,k,count; 2258 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2259 PetscScalar zero = 0.0; 2260 MatScalar *aa; 2261 const PetscScalar *xx; 2262 PetscScalar *bb; 2263 PetscBool *zeroed,vecs = PETSC_FALSE; 2264 2265 PetscFunctionBegin; 2266 /* fix right hand side if needed */ 2267 if (x && b) { 2268 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2269 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2270 vecs = PETSC_TRUE; 2271 } 2272 A->same_nonzero = PETSC_TRUE; 2273 2274 /* zero the columns */ 2275 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2276 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2277 for (i=0; i<is_n; i++) { 2278 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2279 zeroed[is_idx[i]] = PETSC_TRUE; 2280 } 2281 for (i=0; i<A->rmap->N; i++) { 2282 if (!zeroed[i]) { 2283 row = i/bs; 2284 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2285 for (k=0; k<bs; k++) { 2286 col = bs*baij->j[j] + k; 2287 if (zeroed[col]) { 2288 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2289 if (vecs) bb[i] -= aa[0]*xx[col]; 2290 aa[0] = 0.0; 2291 } 2292 } 2293 } 2294 } else if (vecs) bb[i] = diag*xx[i]; 2295 } 2296 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2297 if (vecs) { 2298 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2299 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2300 } 2301 2302 /* zero the rows */ 2303 for (i=0; i<is_n; i++) { 2304 row = is_idx[i]; 2305 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2306 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2307 for (k=0; k<count; k++) { 2308 aa[0] = zero; 2309 aa += bs; 2310 } 2311 if (diag != 0.0) { 2312 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2313 } 2314 } 2315 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2321 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2322 { 2323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2324 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2325 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2326 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2327 PetscErrorCode ierr; 2328 PetscInt ridx,cidx,bs2=a->bs2; 2329 PetscBool roworiented=a->roworiented; 2330 MatScalar *ap,value,*aa=a->a,*bap; 2331 2332 PetscFunctionBegin; 2333 if (v) PetscValidScalarPointer(v,6); 2334 for (k=0; k<m; k++) { /* loop over added rows */ 2335 row = im[k]; 2336 brow = row/bs; 2337 if (row < 0) continue; 2338 #if defined(PETSC_USE_DEBUG) 2339 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2340 #endif 2341 rp = aj + ai[brow]; 2342 ap = aa + bs2*ai[brow]; 2343 rmax = imax[brow]; 2344 nrow = ailen[brow]; 2345 low = 0; 2346 high = nrow; 2347 for (l=0; l<n; l++) { /* loop over added columns */ 2348 if (in[l] < 0) continue; 2349 #if defined(PETSC_USE_DEBUG) 2350 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2351 #endif 2352 col = in[l]; bcol = col/bs; 2353 ridx = row % bs; cidx = col % bs; 2354 if (roworiented) { 2355 value = v[l + k*n]; 2356 } else { 2357 value = v[k + l*m]; 2358 } 2359 if (col <= lastcol) low = 0; else high = nrow; 2360 lastcol = col; 2361 while (high-low > 7) { 2362 t = (low+high)/2; 2363 if (rp[t] > bcol) high = t; 2364 else low = t; 2365 } 2366 for (i=low; i<high; i++) { 2367 if (rp[i] > bcol) break; 2368 if (rp[i] == bcol) { 2369 bap = ap + bs2*i + bs*cidx + ridx; 2370 if (is == ADD_VALUES) *bap += value; 2371 else *bap = value; 2372 goto noinsert1; 2373 } 2374 } 2375 if (nonew == 1) goto noinsert1; 2376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2377 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2378 N = nrow++ - 1; high++; 2379 /* shift up all the later entries in this row */ 2380 for (ii=N; ii>=i; ii--) { 2381 rp[ii+1] = rp[ii]; 2382 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2383 } 2384 if (N>=i) { 2385 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2386 } 2387 rp[i] = bcol; 2388 ap[bs2*i + bs*cidx + ridx] = value; 2389 a->nz++; 2390 noinsert1:; 2391 low = i; 2392 } 2393 ailen[brow] = nrow; 2394 } 2395 A->same_nonzero = PETSC_FALSE; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2401 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2402 { 2403 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2404 Mat outA; 2405 PetscErrorCode ierr; 2406 PetscBool row_identity,col_identity; 2407 2408 PetscFunctionBegin; 2409 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2410 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2411 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2412 if (!row_identity || !col_identity) { 2413 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2414 } 2415 2416 outA = inA; 2417 inA->factortype = MAT_FACTOR_LU; 2418 2419 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2420 2421 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2422 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2423 a->row = row; 2424 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2425 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2426 a->col = col; 2427 2428 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2429 if (a->icol) { 2430 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2431 } 2432 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2433 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2434 2435 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2436 if (!a->solve_work) { 2437 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2438 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2439 } 2440 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2441 2442 PetscFunctionReturn(0); 2443 } 2444 2445 EXTERN_C_BEGIN 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2448 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2449 { 2450 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2451 PetscInt i,nz,mbs; 2452 2453 PetscFunctionBegin; 2454 nz = baij->maxnz; 2455 mbs = baij->mbs; 2456 for (i=0; i<nz; i++) { 2457 baij->j[i] = indices[i]; 2458 } 2459 baij->nz = nz; 2460 for (i=0; i<mbs; i++) { 2461 baij->ilen[i] = baij->imax[i]; 2462 } 2463 PetscFunctionReturn(0); 2464 } 2465 EXTERN_C_END 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2469 /*@ 2470 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2471 in the matrix. 2472 2473 Input Parameters: 2474 + mat - the SeqBAIJ matrix 2475 - indices - the column indices 2476 2477 Level: advanced 2478 2479 Notes: 2480 This can be called if you have precomputed the nonzero structure of the 2481 matrix and want to provide it to the matrix object to improve the performance 2482 of the MatSetValues() operation. 2483 2484 You MUST have set the correct numbers of nonzeros per row in the call to 2485 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2486 2487 MUST be called before any calls to MatSetValues(); 2488 2489 @*/ 2490 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2491 { 2492 PetscErrorCode ierr; 2493 2494 PetscFunctionBegin; 2495 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2496 PetscValidPointer(indices,2); 2497 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2503 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2504 { 2505 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2506 PetscErrorCode ierr; 2507 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2508 PetscReal atmp; 2509 PetscScalar *x,zero = 0.0; 2510 MatScalar *aa; 2511 PetscInt ncols,brow,krow,kcol; 2512 2513 PetscFunctionBegin; 2514 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2515 bs = A->rmap->bs; 2516 aa = a->a; 2517 ai = a->i; 2518 aj = a->j; 2519 mbs = a->mbs; 2520 2521 ierr = VecSet(v,zero);CHKERRQ(ierr); 2522 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2523 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2524 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2525 for (i=0; i<mbs; i++) { 2526 ncols = ai[1] - ai[0]; ai++; 2527 brow = bs*i; 2528 for (j=0; j<ncols; j++){ 2529 for (kcol=0; kcol<bs; kcol++){ 2530 for (krow=0; krow<bs; krow++){ 2531 atmp = PetscAbsScalar(*aa);aa++; 2532 row = brow + krow; /* row index */ 2533 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2534 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2535 } 2536 } 2537 aj++; 2538 } 2539 } 2540 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatCopy_SeqBAIJ" 2546 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2547 { 2548 PetscErrorCode ierr; 2549 2550 PetscFunctionBegin; 2551 /* If the two matrices have the same copy implementation, use fast copy. */ 2552 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2553 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2554 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2555 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2556 2557 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2558 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2559 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2560 } else { 2561 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2562 } 2563 PetscFunctionReturn(0); 2564 } 2565 2566 #undef __FUNCT__ 2567 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2568 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2569 { 2570 PetscErrorCode ierr; 2571 2572 PetscFunctionBegin; 2573 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2579 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2580 { 2581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2582 PetscFunctionBegin; 2583 *array = a->a; 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2589 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2590 { 2591 PetscFunctionBegin; 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2597 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2598 { 2599 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2600 PetscErrorCode ierr; 2601 PetscInt i,bs=Y->rmap->bs,j,bs2; 2602 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2603 2604 PetscFunctionBegin; 2605 if (str == SAME_NONZERO_PATTERN) { 2606 PetscScalar alpha = a; 2607 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2608 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2609 if (y->xtoy && y->XtoY != X) { 2610 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2611 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2612 } 2613 if (!y->xtoy) { /* get xtoy */ 2614 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2615 y->XtoY = X; 2616 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2617 } 2618 bs2 = bs*bs; 2619 for (i=0; i<x->nz; i++) { 2620 j = 0; 2621 while (j < bs2){ 2622 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2623 j++; 2624 } 2625 } 2626 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2627 } else { 2628 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2629 } 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2635 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2636 { 2637 PetscInt rbs,cbs; 2638 PetscErrorCode ierr; 2639 2640 PetscFunctionBegin; 2641 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2642 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2643 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2644 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2645 PetscFunctionReturn(0); 2646 } 2647 2648 #undef __FUNCT__ 2649 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2650 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2651 { 2652 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2653 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2654 MatScalar *aa = a->a; 2655 2656 PetscFunctionBegin; 2657 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2658 PetscFunctionReturn(0); 2659 } 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2663 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2664 { 2665 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2666 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2667 MatScalar *aa = a->a; 2668 2669 PetscFunctionBegin; 2670 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2675 2676 #undef __FUNCT__ 2677 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2678 /* 2679 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2680 */ 2681 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2682 { 2683 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2684 PetscErrorCode ierr; 2685 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2686 PetscInt nz = a->i[m],row,*jj,mr,col; 2687 2688 PetscFunctionBegin; 2689 *nn = n; 2690 if (!ia) PetscFunctionReturn(0); 2691 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2692 else { 2693 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2694 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2695 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2696 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2697 jj = a->j; 2698 for (i=0; i<nz; i++) { 2699 collengths[jj[i]]++; 2700 } 2701 cia[0] = oshift; 2702 for (i=0; i<n; i++) { 2703 cia[i+1] = cia[i] + collengths[i]; 2704 } 2705 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2706 jj = a->j; 2707 for (row=0; row<m; row++) { 2708 mr = a->i[row+1] - a->i[row]; 2709 for (i=0; i<mr; i++) { 2710 col = *jj++; 2711 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2712 } 2713 } 2714 ierr = PetscFree(collengths);CHKERRQ(ierr); 2715 *ia = cia; *ja = cja; 2716 } 2717 PetscFunctionReturn(0); 2718 } 2719 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2722 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2723 { 2724 PetscErrorCode ierr; 2725 2726 PetscFunctionBegin; 2727 if (!ia) PetscFunctionReturn(0); 2728 ierr = PetscFree(*ia);CHKERRQ(ierr); 2729 ierr = PetscFree(*ja);CHKERRQ(ierr); 2730 PetscFunctionReturn(0); 2731 } 2732 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2735 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2736 { 2737 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2738 PetscErrorCode ierr; 2739 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2740 PetscScalar dx,*y,*xx,*w3_array; 2741 PetscScalar *vscale_array; 2742 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2743 Vec w1=coloring->w1,w2=coloring->w2,w3; 2744 void *fctx = coloring->fctx; 2745 PetscBool flg = PETSC_FALSE; 2746 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2747 Vec x1_tmp; 2748 2749 PetscFunctionBegin; 2750 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2751 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2752 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2753 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2754 2755 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2756 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2757 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2758 if (flg) { 2759 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2760 } else { 2761 PetscBool assembled; 2762 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2763 if (assembled) { 2764 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2765 } 2766 } 2767 2768 x1_tmp = x1; 2769 if (!coloring->vscale){ 2770 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2771 } 2772 2773 /* 2774 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2775 coloring->F for the coarser grids from the finest 2776 */ 2777 if (coloring->F) { 2778 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2779 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2780 if (m1 != m2) { 2781 coloring->F = 0; 2782 } 2783 } 2784 2785 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2786 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2787 } 2788 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2789 2790 /* Set w1 = F(x1) */ 2791 if (coloring->F) { 2792 w1 = coloring->F; /* use already computed value of function */ 2793 coloring->F = 0; 2794 } else { 2795 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2796 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2797 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2798 } 2799 2800 if (!coloring->w3) { 2801 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2802 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2803 } 2804 w3 = coloring->w3; 2805 2806 CHKMEMQ; 2807 /* Compute all the local scale factors, including ghost points */ 2808 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2809 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2810 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2811 if (ctype == IS_COLORING_GHOSTED){ 2812 col_start = 0; col_end = N; 2813 } else if (ctype == IS_COLORING_GLOBAL){ 2814 xx = xx - start; 2815 vscale_array = vscale_array - start; 2816 col_start = start; col_end = N + start; 2817 } CHKMEMQ; 2818 for (col=col_start; col<col_end; col++){ 2819 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2820 if (coloring->htype[0] == 'w') { 2821 dx = 1.0 + unorm; 2822 } else { 2823 dx = xx[col]; 2824 } 2825 if (dx == 0.0) dx = 1.0; 2826 #if !defined(PETSC_USE_COMPLEX) 2827 if (dx < umin && dx >= 0.0) dx = umin; 2828 else if (dx < 0.0 && dx > -umin) dx = -umin; 2829 #else 2830 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2831 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2832 #endif 2833 dx *= epsilon; 2834 vscale_array[col] = 1.0/dx; 2835 } CHKMEMQ; 2836 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2837 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2838 if (ctype == IS_COLORING_GLOBAL){ 2839 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2840 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2841 } 2842 CHKMEMQ; 2843 if (coloring->vscaleforrow) { 2844 vscaleforrow = coloring->vscaleforrow; 2845 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2846 2847 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2848 /* 2849 Loop over each color 2850 */ 2851 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2852 for (k=0; k<coloring->ncolors; k++) { 2853 coloring->currentcolor = k; 2854 for (i=0; i<bs; i++) { 2855 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2856 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2857 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2858 /* 2859 Loop over each column associated with color 2860 adding the perturbation to the vector w3. 2861 */ 2862 for (l=0; l<coloring->ncolumns[k]; l++) { 2863 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2864 if (coloring->htype[0] == 'w') { 2865 dx = 1.0 + unorm; 2866 } else { 2867 dx = xx[col]; 2868 } 2869 if (dx == 0.0) dx = 1.0; 2870 #if !defined(PETSC_USE_COMPLEX) 2871 if (dx < umin && dx >= 0.0) dx = umin; 2872 else if (dx < 0.0 && dx > -umin) dx = -umin; 2873 #else 2874 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2875 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2876 #endif 2877 dx *= epsilon; 2878 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2879 w3_array[col] += dx; 2880 } 2881 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2882 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2883 2884 /* 2885 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2886 w2 = F(x1 + dx) - F(x1) 2887 */ 2888 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2889 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2890 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2891 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2892 2893 /* 2894 Loop over rows of vector, putting results into Jacobian matrix 2895 */ 2896 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2897 for (l=0; l<coloring->nrows[k]; l++) { 2898 row = bs*coloring->rows[k][l]; /* local row index */ 2899 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2900 for (j=0; j<bs; j++) { 2901 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2902 srows[j] = row + start + j; 2903 } 2904 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2905 } 2906 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2907 } 2908 } /* endof for each color */ 2909 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2910 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2911 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2912 ierr = PetscFree(srows);CHKERRQ(ierr); 2913 2914 coloring->currentcolor = -1; 2915 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2916 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2917 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2918 PetscFunctionReturn(0); 2919 } 2920 2921 /* -------------------------------------------------------------------*/ 2922 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2923 MatGetRow_SeqBAIJ, 2924 MatRestoreRow_SeqBAIJ, 2925 MatMult_SeqBAIJ_N, 2926 /* 4*/ MatMultAdd_SeqBAIJ_N, 2927 MatMultTranspose_SeqBAIJ, 2928 MatMultTransposeAdd_SeqBAIJ, 2929 0, 2930 0, 2931 0, 2932 /*10*/ 0, 2933 MatLUFactor_SeqBAIJ, 2934 0, 2935 0, 2936 MatTranspose_SeqBAIJ, 2937 /*15*/ MatGetInfo_SeqBAIJ, 2938 MatEqual_SeqBAIJ, 2939 MatGetDiagonal_SeqBAIJ, 2940 MatDiagonalScale_SeqBAIJ, 2941 MatNorm_SeqBAIJ, 2942 /*20*/ 0, 2943 MatAssemblyEnd_SeqBAIJ, 2944 MatSetOption_SeqBAIJ, 2945 MatZeroEntries_SeqBAIJ, 2946 /*24*/ MatZeroRows_SeqBAIJ, 2947 0, 2948 0, 2949 0, 2950 0, 2951 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2952 0, 2953 0, 2954 MatGetArray_SeqBAIJ, 2955 MatRestoreArray_SeqBAIJ, 2956 /*34*/ MatDuplicate_SeqBAIJ, 2957 0, 2958 0, 2959 MatILUFactor_SeqBAIJ, 2960 0, 2961 /*39*/ MatAXPY_SeqBAIJ, 2962 MatGetSubMatrices_SeqBAIJ, 2963 MatIncreaseOverlap_SeqBAIJ, 2964 MatGetValues_SeqBAIJ, 2965 MatCopy_SeqBAIJ, 2966 /*44*/ 0, 2967 MatScale_SeqBAIJ, 2968 0, 2969 0, 2970 MatZeroRowsColumns_SeqBAIJ, 2971 /*49*/ MatSetBlockSize_SeqBAIJ, 2972 MatGetRowIJ_SeqBAIJ, 2973 MatRestoreRowIJ_SeqBAIJ, 2974 MatGetColumnIJ_SeqBAIJ, 2975 MatRestoreColumnIJ_SeqBAIJ, 2976 /*54*/ MatFDColoringCreate_SeqAIJ, 2977 0, 2978 0, 2979 0, 2980 MatSetValuesBlocked_SeqBAIJ, 2981 /*59*/ MatGetSubMatrix_SeqBAIJ, 2982 MatDestroy_SeqBAIJ, 2983 MatView_SeqBAIJ, 2984 0, 2985 0, 2986 /*64*/ 0, 2987 0, 2988 0, 2989 0, 2990 0, 2991 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2992 0, 2993 MatConvert_Basic, 2994 0, 2995 0, 2996 /*74*/ 0, 2997 MatFDColoringApply_BAIJ, 2998 0, 2999 0, 3000 0, 3001 /*79*/ 0, 3002 0, 3003 0, 3004 0, 3005 MatLoad_SeqBAIJ, 3006 /*84*/ 0, 3007 0, 3008 0, 3009 0, 3010 0, 3011 /*89*/ 0, 3012 0, 3013 0, 3014 0, 3015 0, 3016 /*94*/ 0, 3017 0, 3018 0, 3019 0, 3020 0, 3021 /*99*/0, 3022 0, 3023 0, 3024 0, 3025 0, 3026 /*104*/0, 3027 MatRealPart_SeqBAIJ, 3028 MatImaginaryPart_SeqBAIJ, 3029 0, 3030 0, 3031 /*109*/0, 3032 0, 3033 0, 3034 0, 3035 MatMissingDiagonal_SeqBAIJ, 3036 /*114*/0, 3037 0, 3038 0, 3039 0, 3040 0, 3041 /*119*/0, 3042 0, 3043 MatMultHermitianTranspose_SeqBAIJ, 3044 MatMultHermitianTransposeAdd_SeqBAIJ, 3045 0, 3046 }; 3047 3048 EXTERN_C_BEGIN 3049 #undef __FUNCT__ 3050 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3051 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3052 { 3053 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3054 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3055 PetscErrorCode ierr; 3056 3057 PetscFunctionBegin; 3058 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3059 3060 /* allocate space for values if not already there */ 3061 if (!aij->saved_values) { 3062 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3063 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3064 } 3065 3066 /* copy values over */ 3067 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 EXTERN_C_END 3071 3072 EXTERN_C_BEGIN 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3075 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3076 { 3077 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3078 PetscErrorCode ierr; 3079 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3080 3081 PetscFunctionBegin; 3082 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3083 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3084 3085 /* copy values over */ 3086 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3087 PetscFunctionReturn(0); 3088 } 3089 EXTERN_C_END 3090 3091 EXTERN_C_BEGIN 3092 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3093 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3094 EXTERN_C_END 3095 3096 EXTERN_C_BEGIN 3097 #undef __FUNCT__ 3098 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3099 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3100 { 3101 Mat_SeqBAIJ *b; 3102 PetscErrorCode ierr; 3103 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3104 PetscBool flg,skipallocation = PETSC_FALSE; 3105 3106 PetscFunctionBegin; 3107 3108 if (nz == MAT_SKIP_ALLOCATION) { 3109 skipallocation = PETSC_TRUE; 3110 nz = 0; 3111 } 3112 3113 if (bs < 0) { 3114 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3115 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3116 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3117 bs = PetscAbs(bs); 3118 } 3119 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3120 bs = newbs; 3121 3122 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3123 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3124 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3125 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3126 3127 B->preallocated = PETSC_TRUE; 3128 3129 mbs = B->rmap->n/bs; 3130 nbs = B->cmap->n/bs; 3131 bs2 = bs*bs; 3132 3133 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3134 3135 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3136 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3137 if (nnz) { 3138 for (i=0; i<mbs; i++) { 3139 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3140 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3141 } 3142 } 3143 3144 b = (Mat_SeqBAIJ*)B->data; 3145 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3146 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3147 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3148 3149 if (!flg) { 3150 switch (bs) { 3151 case 1: 3152 B->ops->mult = MatMult_SeqBAIJ_1; 3153 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3154 B->ops->sor = MatSOR_SeqBAIJ_1; 3155 break; 3156 case 2: 3157 B->ops->mult = MatMult_SeqBAIJ_2; 3158 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3159 B->ops->sor = MatSOR_SeqBAIJ_2; 3160 break; 3161 case 3: 3162 B->ops->mult = MatMult_SeqBAIJ_3; 3163 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3164 B->ops->sor = MatSOR_SeqBAIJ_3; 3165 break; 3166 case 4: 3167 B->ops->mult = MatMult_SeqBAIJ_4; 3168 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3169 B->ops->sor = MatSOR_SeqBAIJ_4; 3170 break; 3171 case 5: 3172 B->ops->mult = MatMult_SeqBAIJ_5; 3173 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3174 B->ops->sor = MatSOR_SeqBAIJ_5; 3175 break; 3176 case 6: 3177 B->ops->mult = MatMult_SeqBAIJ_6; 3178 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3179 B->ops->sor = MatSOR_SeqBAIJ_6; 3180 break; 3181 case 7: 3182 B->ops->mult = MatMult_SeqBAIJ_7; 3183 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3184 B->ops->sor = MatSOR_SeqBAIJ_7; 3185 break; 3186 case 15: 3187 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3188 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3189 B->ops->sor = MatSOR_SeqBAIJ_N; 3190 break; 3191 default: 3192 B->ops->mult = MatMult_SeqBAIJ_N; 3193 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3194 B->ops->sor = MatSOR_SeqBAIJ_N; 3195 break; 3196 } 3197 } 3198 B->rmap->bs = bs; 3199 b->mbs = mbs; 3200 b->nbs = nbs; 3201 if (!skipallocation) { 3202 if (!b->imax) { 3203 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3204 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3205 b->free_imax_ilen = PETSC_TRUE; 3206 } 3207 /* b->ilen will count nonzeros in each block row so far. */ 3208 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3209 if (!nnz) { 3210 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3211 else if (nz < 0) nz = 1; 3212 for (i=0; i<mbs; i++) b->imax[i] = nz; 3213 nz = nz*mbs; 3214 } else { 3215 nz = 0; 3216 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3217 } 3218 3219 /* allocate the matrix space */ 3220 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3221 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3222 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3223 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3224 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3225 b->singlemalloc = PETSC_TRUE; 3226 b->i[0] = 0; 3227 for (i=1; i<mbs+1; i++) { 3228 b->i[i] = b->i[i-1] + b->imax[i-1]; 3229 } 3230 b->free_a = PETSC_TRUE; 3231 b->free_ij = PETSC_TRUE; 3232 } else { 3233 b->free_a = PETSC_FALSE; 3234 b->free_ij = PETSC_FALSE; 3235 } 3236 3237 B->rmap->bs = bs; 3238 b->bs2 = bs2; 3239 b->mbs = mbs; 3240 b->nz = 0; 3241 b->maxnz = nz; 3242 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3243 PetscFunctionReturn(0); 3244 } 3245 EXTERN_C_END 3246 3247 EXTERN_C_BEGIN 3248 #undef __FUNCT__ 3249 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3250 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3251 { 3252 PetscInt i,m,nz,nz_max=0,*nnz; 3253 PetscScalar *values=0; 3254 PetscErrorCode ierr; 3255 3256 PetscFunctionBegin; 3257 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3258 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3259 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3260 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3261 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3262 m = B->rmap->n/bs; 3263 3264 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3265 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3266 for(i=0; i<m; i++) { 3267 nz = ii[i+1]- ii[i]; 3268 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3269 nz_max = PetscMax(nz_max, nz); 3270 nnz[i] = nz; 3271 } 3272 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3273 ierr = PetscFree(nnz);CHKERRQ(ierr); 3274 3275 values = (PetscScalar*)V; 3276 if (!values) { 3277 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3278 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3279 } 3280 for (i=0; i<m; i++) { 3281 PetscInt ncols = ii[i+1] - ii[i]; 3282 const PetscInt *icols = jj + ii[i]; 3283 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3284 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3285 } 3286 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3287 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3288 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 3290 PetscFunctionReturn(0); 3291 } 3292 EXTERN_C_END 3293 3294 3295 EXTERN_C_BEGIN 3296 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3297 #if defined(PETSC_HAVE_MUMPS) 3298 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3299 #endif 3300 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3301 EXTERN_C_END 3302 3303 /*MC 3304 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3305 block sparse compressed row format. 3306 3307 Options Database Keys: 3308 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3309 3310 Level: beginner 3311 3312 .seealso: MatCreateSeqBAIJ() 3313 M*/ 3314 3315 3316 EXTERN_C_BEGIN 3317 #undef __FUNCT__ 3318 #define __FUNCT__ "MatCreate_SeqBAIJ" 3319 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3320 { 3321 PetscErrorCode ierr; 3322 PetscMPIInt size; 3323 Mat_SeqBAIJ *b; 3324 3325 PetscFunctionBegin; 3326 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3327 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3328 3329 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3330 B->data = (void*)b; 3331 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3332 b->row = 0; 3333 b->col = 0; 3334 b->icol = 0; 3335 b->reallocs = 0; 3336 b->saved_values = 0; 3337 3338 b->roworiented = PETSC_TRUE; 3339 b->nonew = 0; 3340 b->diag = 0; 3341 b->solve_work = 0; 3342 b->mult_work = 0; 3343 B->spptr = 0; 3344 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3345 b->keepnonzeropattern = PETSC_FALSE; 3346 b->xtoy = 0; 3347 b->XtoY = 0; 3348 B->same_nonzero = PETSC_FALSE; 3349 3350 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3351 "MatGetFactorAvailable_seqbaij_petsc", 3352 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3353 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3354 "MatGetFactor_seqbaij_petsc", 3355 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3356 #if defined(PETSC_HAVE_MUMPS) 3357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3358 #endif 3359 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3360 "MatInvertBlockDiagonal_SeqBAIJ", 3361 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3362 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3363 "MatStoreValues_SeqBAIJ", 3364 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3365 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3366 "MatRetrieveValues_SeqBAIJ", 3367 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3368 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3369 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3370 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3371 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3372 "MatConvert_SeqBAIJ_SeqAIJ", 3373 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3374 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3375 "MatConvert_SeqBAIJ_SeqSBAIJ", 3376 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3377 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3378 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3379 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3380 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3381 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3382 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3383 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3384 PetscFunctionReturn(0); 3385 } 3386 EXTERN_C_END 3387 3388 #undef __FUNCT__ 3389 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3390 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3391 { 3392 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3393 PetscErrorCode ierr; 3394 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3395 3396 PetscFunctionBegin; 3397 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3398 3399 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3400 c->imax = a->imax; 3401 c->ilen = a->ilen; 3402 c->free_imax_ilen = PETSC_FALSE; 3403 } else { 3404 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3405 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3406 for (i=0; i<mbs; i++) { 3407 c->imax[i] = a->imax[i]; 3408 c->ilen[i] = a->ilen[i]; 3409 } 3410 c->free_imax_ilen = PETSC_TRUE; 3411 } 3412 3413 /* allocate the matrix space */ 3414 if (mallocmatspace){ 3415 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3416 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3417 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3418 c->singlemalloc = PETSC_FALSE; 3419 c->free_ij = PETSC_FALSE; 3420 c->i = a->i; 3421 c->j = a->j; 3422 c->parent = A; 3423 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3424 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3425 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3426 } else { 3427 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3428 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3429 c->singlemalloc = PETSC_TRUE; 3430 c->free_ij = PETSC_TRUE; 3431 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3432 if (mbs > 0) { 3433 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3434 if (cpvalues == MAT_COPY_VALUES) { 3435 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3436 } else { 3437 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3438 } 3439 } 3440 } 3441 } 3442 3443 c->roworiented = a->roworiented; 3444 c->nonew = a->nonew; 3445 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3446 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3447 c->bs2 = a->bs2; 3448 c->mbs = a->mbs; 3449 c->nbs = a->nbs; 3450 3451 if (a->diag) { 3452 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3453 c->diag = a->diag; 3454 c->free_diag = PETSC_FALSE; 3455 } else { 3456 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3457 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3458 for (i=0; i<mbs; i++) { 3459 c->diag[i] = a->diag[i]; 3460 } 3461 c->free_diag = PETSC_TRUE; 3462 } 3463 } else c->diag = 0; 3464 c->nz = a->nz; 3465 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3466 c->solve_work = 0; 3467 c->mult_work = 0; 3468 c->free_a = PETSC_TRUE; 3469 c->free_ij = PETSC_TRUE; 3470 C->preallocated = PETSC_TRUE; 3471 C->assembled = PETSC_TRUE; 3472 3473 c->compressedrow.use = a->compressedrow.use; 3474 c->compressedrow.nrows = a->compressedrow.nrows; 3475 c->compressedrow.check = a->compressedrow.check; 3476 if (a->compressedrow.use){ 3477 i = a->compressedrow.nrows; 3478 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3479 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3480 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3481 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3482 } else { 3483 c->compressedrow.use = PETSC_FALSE; 3484 c->compressedrow.i = PETSC_NULL; 3485 c->compressedrow.rindex = PETSC_NULL; 3486 } 3487 C->same_nonzero = A->same_nonzero; 3488 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3489 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3490 PetscFunctionReturn(0); 3491 } 3492 3493 #undef __FUNCT__ 3494 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3495 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3496 { 3497 PetscErrorCode ierr; 3498 3499 PetscFunctionBegin; 3500 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3501 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3502 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3503 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3504 PetscFunctionReturn(0); 3505 } 3506 3507 #undef __FUNCT__ 3508 #define __FUNCT__ "MatLoad_SeqBAIJ" 3509 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3510 { 3511 Mat_SeqBAIJ *a; 3512 PetscErrorCode ierr; 3513 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3514 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3515 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3516 PetscInt *masked,nmask,tmp,bs2,ishift; 3517 PetscMPIInt size; 3518 int fd; 3519 PetscScalar *aa; 3520 MPI_Comm comm = ((PetscObject)viewer)->comm; 3521 3522 PetscFunctionBegin; 3523 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3524 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3525 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3526 bs2 = bs*bs; 3527 3528 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3529 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3530 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3531 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3532 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3533 M = header[1]; N = header[2]; nz = header[3]; 3534 3535 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3536 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3537 3538 /* 3539 This code adds extra rows to make sure the number of rows is 3540 divisible by the blocksize 3541 */ 3542 mbs = M/bs; 3543 extra_rows = bs - M + bs*(mbs); 3544 if (extra_rows == bs) extra_rows = 0; 3545 else mbs++; 3546 if (extra_rows) { 3547 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3548 } 3549 3550 /* Set global sizes if not already set */ 3551 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3552 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3553 } else { /* Check if the matrix global sizes are correct */ 3554 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3555 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3556 } 3557 3558 /* read in row lengths */ 3559 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3560 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3561 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3562 3563 /* read in column indices */ 3564 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3565 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3566 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3567 3568 /* loop over row lengths determining block row lengths */ 3569 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3570 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3571 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3572 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3573 rowcount = 0; 3574 nzcount = 0; 3575 for (i=0; i<mbs; i++) { 3576 nmask = 0; 3577 for (j=0; j<bs; j++) { 3578 kmax = rowlengths[rowcount]; 3579 for (k=0; k<kmax; k++) { 3580 tmp = jj[nzcount++]/bs; 3581 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3582 } 3583 rowcount++; 3584 } 3585 browlengths[i] += nmask; 3586 /* zero out the mask elements we set */ 3587 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3588 } 3589 3590 /* Do preallocation */ 3591 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3592 a = (Mat_SeqBAIJ*)newmat->data; 3593 3594 /* set matrix "i" values */ 3595 a->i[0] = 0; 3596 for (i=1; i<= mbs; i++) { 3597 a->i[i] = a->i[i-1] + browlengths[i-1]; 3598 a->ilen[i-1] = browlengths[i-1]; 3599 } 3600 a->nz = 0; 3601 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3602 3603 /* read in nonzero values */ 3604 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3605 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3606 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3607 3608 /* set "a" and "j" values into matrix */ 3609 nzcount = 0; jcount = 0; 3610 for (i=0; i<mbs; i++) { 3611 nzcountb = nzcount; 3612 nmask = 0; 3613 for (j=0; j<bs; j++) { 3614 kmax = rowlengths[i*bs+j]; 3615 for (k=0; k<kmax; k++) { 3616 tmp = jj[nzcount++]/bs; 3617 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3618 } 3619 } 3620 /* sort the masked values */ 3621 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3622 3623 /* set "j" values into matrix */ 3624 maskcount = 1; 3625 for (j=0; j<nmask; j++) { 3626 a->j[jcount++] = masked[j]; 3627 mask[masked[j]] = maskcount++; 3628 } 3629 /* set "a" values into matrix */ 3630 ishift = bs2*a->i[i]; 3631 for (j=0; j<bs; j++) { 3632 kmax = rowlengths[i*bs+j]; 3633 for (k=0; k<kmax; k++) { 3634 tmp = jj[nzcountb]/bs ; 3635 block = mask[tmp] - 1; 3636 point = jj[nzcountb] - bs*tmp; 3637 idx = ishift + bs2*block + j + bs*point; 3638 a->a[idx] = (MatScalar)aa[nzcountb++]; 3639 } 3640 } 3641 /* zero out the mask elements we set */ 3642 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3643 } 3644 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3645 3646 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3647 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3648 ierr = PetscFree(aa);CHKERRQ(ierr); 3649 ierr = PetscFree(jj);CHKERRQ(ierr); 3650 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3651 3652 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3653 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3655 PetscFunctionReturn(0); 3656 } 3657 3658 #undef __FUNCT__ 3659 #define __FUNCT__ "MatCreateSeqBAIJ" 3660 /*@C 3661 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3662 compressed row) format. For good matrix assembly performance the 3663 user should preallocate the matrix storage by setting the parameter nz 3664 (or the array nnz). By setting these parameters accurately, performance 3665 during matrix assembly can be increased by more than a factor of 50. 3666 3667 Collective on MPI_Comm 3668 3669 Input Parameters: 3670 + comm - MPI communicator, set to PETSC_COMM_SELF 3671 . bs - size of block 3672 . m - number of rows 3673 . n - number of columns 3674 . nz - number of nonzero blocks per block row (same for all rows) 3675 - nnz - array containing the number of nonzero blocks in the various block rows 3676 (possibly different for each block row) or PETSC_NULL 3677 3678 Output Parameter: 3679 . A - the matrix 3680 3681 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3682 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3683 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3684 3685 Options Database Keys: 3686 . -mat_no_unroll - uses code that does not unroll the loops in the 3687 block calculations (much slower) 3688 . -mat_block_size - size of the blocks to use 3689 3690 Level: intermediate 3691 3692 Notes: 3693 The number of rows and columns must be divisible by blocksize. 3694 3695 If the nnz parameter is given then the nz parameter is ignored 3696 3697 A nonzero block is any block that as 1 or more nonzeros in it 3698 3699 The block AIJ format is fully compatible with standard Fortran 77 3700 storage. That is, the stored row and column indices can begin at 3701 either one (as in Fortran) or zero. See the users' manual for details. 3702 3703 Specify the preallocated storage with either nz or nnz (not both). 3704 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3705 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3706 matrices. 3707 3708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3709 @*/ 3710 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3711 { 3712 PetscErrorCode ierr; 3713 3714 PetscFunctionBegin; 3715 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3716 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3717 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3718 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3719 PetscFunctionReturn(0); 3720 } 3721 3722 #undef __FUNCT__ 3723 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3724 /*@C 3725 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3726 per row in the matrix. For good matrix assembly performance the 3727 user should preallocate the matrix storage by setting the parameter nz 3728 (or the array nnz). By setting these parameters accurately, performance 3729 during matrix assembly can be increased by more than a factor of 50. 3730 3731 Collective on MPI_Comm 3732 3733 Input Parameters: 3734 + A - the matrix 3735 . bs - size of block 3736 . nz - number of block nonzeros per block row (same for all rows) 3737 - nnz - array containing the number of block nonzeros in the various block rows 3738 (possibly different for each block row) or PETSC_NULL 3739 3740 Options Database Keys: 3741 . -mat_no_unroll - uses code that does not unroll the loops in the 3742 block calculations (much slower) 3743 . -mat_block_size - size of the blocks to use 3744 3745 Level: intermediate 3746 3747 Notes: 3748 If the nnz parameter is given then the nz parameter is ignored 3749 3750 You can call MatGetInfo() to get information on how effective the preallocation was; 3751 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3752 You can also run with the option -info and look for messages with the string 3753 malloc in them to see if additional memory allocation was needed. 3754 3755 The block AIJ format is fully compatible with standard Fortran 77 3756 storage. That is, the stored row and column indices can begin at 3757 either one (as in Fortran) or zero. See the users' manual for details. 3758 3759 Specify the preallocated storage with either nz or nnz (not both). 3760 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3761 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3762 3763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3764 @*/ 3765 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3766 { 3767 PetscErrorCode ierr; 3768 3769 PetscFunctionBegin; 3770 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3771 PetscFunctionReturn(0); 3772 } 3773 3774 #undef __FUNCT__ 3775 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3776 /*@C 3777 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3778 (the default sequential PETSc format). 3779 3780 Collective on MPI_Comm 3781 3782 Input Parameters: 3783 + A - the matrix 3784 . i - the indices into j for the start of each local row (starts with zero) 3785 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3786 - v - optional values in the matrix 3787 3788 Level: developer 3789 3790 .keywords: matrix, aij, compressed row, sparse 3791 3792 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3793 @*/ 3794 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3795 { 3796 PetscErrorCode ierr; 3797 3798 PetscFunctionBegin; 3799 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3800 PetscFunctionReturn(0); 3801 } 3802 3803 3804 #undef __FUNCT__ 3805 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3806 /*@ 3807 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3808 3809 Collective on MPI_Comm 3810 3811 Input Parameters: 3812 + comm - must be an MPI communicator of size 1 3813 . bs - size of block 3814 . m - number of rows 3815 . n - number of columns 3816 . i - row indices 3817 . j - column indices 3818 - a - matrix values 3819 3820 Output Parameter: 3821 . mat - the matrix 3822 3823 Level: advanced 3824 3825 Notes: 3826 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3827 once the matrix is destroyed 3828 3829 You cannot set new nonzero locations into this matrix, that will generate an error. 3830 3831 The i and j indices are 0 based 3832 3833 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3834 3835 3836 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3837 3838 @*/ 3839 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3840 { 3841 PetscErrorCode ierr; 3842 PetscInt ii; 3843 Mat_SeqBAIJ *baij; 3844 3845 PetscFunctionBegin; 3846 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3847 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3848 3849 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3850 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3851 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3852 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3853 baij = (Mat_SeqBAIJ*)(*mat)->data; 3854 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3855 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3856 3857 baij->i = i; 3858 baij->j = j; 3859 baij->a = a; 3860 baij->singlemalloc = PETSC_FALSE; 3861 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3862 baij->free_a = PETSC_FALSE; 3863 baij->free_ij = PETSC_FALSE; 3864 3865 for (ii=0; ii<m; ii++) { 3866 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3867 #if defined(PETSC_USE_DEBUG) 3868 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3869 #endif 3870 } 3871 #if defined(PETSC_USE_DEBUG) 3872 for (ii=0; ii<baij->i[m]; ii++) { 3873 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3874 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3875 } 3876 #endif 3877 3878 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 PetscFunctionReturn(0); 3881 } 3882