1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "petscblaslapack.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@ 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 29 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 32 PetscFunctionReturn(0); 33 } 34 35 EXTERN_C_BEGIN 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 38 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 39 { 40 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 41 PetscErrorCode ierr; 42 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 43 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 44 PetscReal shift = 0.0; 45 46 PetscFunctionBegin; 47 if (a->idiagvalid) PetscFunctionReturn(0); 48 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 49 diag_offset = a->diag; 50 if (!a->idiag) { 51 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 52 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 53 } 54 diag = a->idiag; 55 mdiag = a->idiag+bs2*mbs; 56 /* factor and invert each block */ 57 switch (bs){ 58 case 1: 59 for (i=0; i<mbs; i++) { 60 odiag = v + 1*diag_offset[i]; 61 diag[0] = odiag[0]; 62 mdiag[0] = odiag[0]; 63 diag[0] = 1.0 / (diag[0] + shift); 64 diag += 1; 65 mdiag += 1; 66 } 67 break; 68 case 2: 69 for (i=0; i<mbs; i++) { 70 odiag = v + 4*diag_offset[i]; 71 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 72 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 73 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 74 diag += 4; 75 mdiag += 4; 76 } 77 break; 78 case 3: 79 for (i=0; i<mbs; i++) { 80 odiag = v + 9*diag_offset[i]; 81 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 82 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 83 diag[8] = odiag[8]; 84 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 85 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 86 mdiag[8] = odiag[8]; 87 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 88 diag += 9; 89 mdiag += 9; 90 } 91 break; 92 case 4: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 16*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 97 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 98 diag += 16; 99 mdiag += 16; 100 } 101 break; 102 case 5: 103 for (i=0; i<mbs; i++) { 104 odiag = v + 25*diag_offset[i]; 105 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 108 diag += 25; 109 mdiag += 25; 110 } 111 break; 112 case 6: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 36*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 118 diag += 36; 119 mdiag += 36; 120 } 121 break; 122 case 7: 123 for (i=0; i<mbs; i++) { 124 odiag = v + 49*diag_offset[i]; 125 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 127 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 128 diag += 49; 129 mdiag += 49; 130 } 131 break; 132 default: 133 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 134 for (i=0; i<mbs; i++) { 135 odiag = v + bs2*diag_offset[i]; 136 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 138 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 139 diag += bs2; 140 mdiag += bs2; 141 } 142 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 143 } 144 a->idiagvalid = PETSC_TRUE; 145 PetscFunctionReturn(0); 146 } 147 EXTERN_C_END 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 151 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 152 { 153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 154 PetscScalar *x,x1,s1; 155 const PetscScalar *b; 156 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 157 PetscErrorCode ierr; 158 PetscInt m = a->mbs,i,i2,nz,j; 159 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 160 161 PetscFunctionBegin; 162 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 163 its = its*lits; 164 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 169 170 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 171 172 diag = a->diag; 173 idiag = a->idiag; 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 179 x[0] = b[0]*idiag[0]; 180 i2 = 1; 181 idiag += 1; 182 for (i=1; i<m; i++) { 183 v = aa + ai[i]; 184 vi = aj + ai[i]; 185 nz = diag[i] - ai[i]; 186 s1 = b[i2]; 187 for (j=0; j<nz; j++) { 188 s1 -= v[j]*x[vi[j]]; 189 } 190 x[i2] = idiag[0]*s1; 191 idiag += 1; 192 i2 += 1; 193 } 194 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 195 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 196 } 197 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 198 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 199 i2 = 0; 200 mdiag = a->idiag+a->mbs; 201 for (i=0; i<m; i++) { 202 x1 = x[i2]; 203 x[i2] = mdiag[0]*x1; 204 mdiag += 1; 205 i2 += 1; 206 } 207 ierr = PetscLogFlops(m);CHKERRQ(ierr); 208 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 209 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 210 } 211 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 212 idiag = a->idiag+a->mbs - 1; 213 i2 = m - 1; 214 x1 = x[i2]; 215 x[i2] = idiag[0]*x1; 216 idiag -= 1; 217 i2 -= 1; 218 for (i=m-2; i>=0; i--) { 219 v = aa + (diag[i]+1); 220 vi = aj + diag[i] + 1; 221 nz = ai[i+1] - diag[i] - 1; 222 s1 = x[i2]; 223 for (j=0; j<nz; j++) { 224 s1 -= v[j]*x[vi[j]]; 225 } 226 x[i2] = idiag[0]*s1; 227 idiag -= 1; 228 i2 -= 1; 229 } 230 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 231 } 232 } else { 233 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 234 } 235 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 242 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 243 { 244 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 245 PetscScalar *x,x1,x2,s1,s2; 246 const PetscScalar *b; 247 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 248 PetscErrorCode ierr; 249 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 250 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 251 252 PetscFunctionBegin; 253 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 254 its = its*lits; 255 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 256 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 257 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 258 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 259 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 260 261 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 262 263 diag = a->diag; 264 idiag = a->idiag; 265 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 267 268 if (flag & SOR_ZERO_INITIAL_GUESS) { 269 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 270 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 271 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 272 i2 = 2; 273 idiag += 4; 274 for (i=1; i<m; i++) { 275 v = aa + 4*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s1 = b[i2]; s2 = b[i2+1]; 279 for (j=0; j<nz; j++) { 280 idx = 2*vi[j]; 281 it = 4*j; 282 x1 = x[idx]; x2 = x[1+idx]; 283 s1 -= v[it]*x1 + v[it+2]*x2; 284 s2 -= v[it+1]*x1 + v[it+3]*x2; 285 } 286 x[i2] = idiag[0]*s1 + idiag[2]*s2; 287 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 288 idiag += 4; 289 i2 += 2; 290 } 291 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 292 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 293 } 294 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 295 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 296 i2 = 0; 297 mdiag = a->idiag+4*a->mbs; 298 for (i=0; i<m; i++) { 299 x1 = x[i2]; x2 = x[i2+1]; 300 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 301 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 302 mdiag += 4; 303 i2 += 2; 304 } 305 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 306 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 307 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 308 } 309 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 310 idiag = a->idiag+4*a->mbs - 4; 311 i2 = 2*m - 2; 312 x1 = x[i2]; x2 = x[i2+1]; 313 x[i2] = idiag[0]*x1 + idiag[2]*x2; 314 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 315 idiag -= 4; 316 i2 -= 2; 317 for (i=m-2; i>=0; i--) { 318 v = aa + 4*(diag[i]+1); 319 vi = aj + diag[i] + 1; 320 nz = ai[i+1] - diag[i] - 1; 321 s1 = x[i2]; s2 = x[i2+1]; 322 for (j=0; j<nz; j++) { 323 idx = 2*vi[j]; 324 it = 4*j; 325 x1 = x[idx]; x2 = x[1+idx]; 326 s1 -= v[it]*x1 + v[it+2]*x2; 327 s2 -= v[it+1]*x1 + v[it+3]*x2; 328 } 329 x[i2] = idiag[0]*s1 + idiag[2]*s2; 330 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 331 idiag -= 4; 332 i2 -= 2; 333 } 334 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 335 } 336 } else { 337 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 338 } 339 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 346 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 347 { 348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 349 PetscScalar *x,x1,x2,x3,s1,s2,s3; 350 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 351 const PetscScalar *b; 352 PetscErrorCode ierr; 353 PetscInt m = a->mbs,i,i2,nz,idx; 354 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 355 356 PetscFunctionBegin; 357 its = its*lits; 358 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 359 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 360 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 361 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 362 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 363 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 364 365 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 366 367 diag = a->diag; 368 idiag = a->idiag; 369 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 370 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 371 372 if (flag & SOR_ZERO_INITIAL_GUESS) { 373 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 374 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 375 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 376 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 377 i2 = 3; 378 idiag += 9; 379 for (i=1; i<m; i++) { 380 v = aa + 9*ai[i]; 381 vi = aj + ai[i]; 382 nz = diag[i] - ai[i]; 383 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 384 while (nz--) { 385 idx = 3*(*vi++); 386 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 387 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 388 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 389 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 390 v += 9; 391 } 392 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 393 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 394 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 395 idiag += 9; 396 i2 += 3; 397 } 398 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 399 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 400 } 401 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 402 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 403 i2 = 0; 404 mdiag = a->idiag+9*a->mbs; 405 for (i=0; i<m; i++) { 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 407 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 408 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 409 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 410 mdiag += 9; 411 i2 += 3; 412 } 413 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 414 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 415 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 416 } 417 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 418 idiag = a->idiag+9*a->mbs - 9; 419 i2 = 3*m - 3; 420 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 421 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 422 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 423 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 424 idiag -= 9; 425 i2 -= 3; 426 for (i=m-2; i>=0; i--) { 427 v = aa + 9*(diag[i]+1); 428 vi = aj + diag[i] + 1; 429 nz = ai[i+1] - diag[i] - 1; 430 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 431 while (nz--) { 432 idx = 3*(*vi++); 433 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 434 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 435 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 436 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 437 v += 9; 438 } 439 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 440 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 441 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 446 } 447 } else { 448 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 449 } 450 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 451 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 457 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 458 { 459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 460 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 461 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 462 const PetscScalar *b; 463 PetscErrorCode ierr; 464 PetscInt m = a->mbs,i,i2,nz,idx; 465 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 466 467 PetscFunctionBegin; 468 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 469 its = its*lits; 470 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 471 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 472 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 473 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 474 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 475 476 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 477 478 diag = a->diag; 479 idiag = a->idiag; 480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 481 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 482 483 if (flag & SOR_ZERO_INITIAL_GUESS) { 484 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 485 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 486 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 487 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 488 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 489 i2 = 4; 490 idiag += 16; 491 for (i=1; i<m; i++) { 492 v = aa + 16*ai[i]; 493 vi = aj + ai[i]; 494 nz = diag[i] - ai[i]; 495 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 496 while (nz--) { 497 idx = 4*(*vi++); 498 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 499 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 500 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 501 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 502 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 503 v += 16; 504 } 505 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 506 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 507 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 508 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 509 idiag += 16; 510 i2 += 4; 511 } 512 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 513 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 514 } 515 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 516 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 517 i2 = 0; 518 mdiag = a->idiag+16*a->mbs; 519 for (i=0; i<m; i++) { 520 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 521 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 522 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 523 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 524 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 525 mdiag += 16; 526 i2 += 4; 527 } 528 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 529 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 530 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 531 } 532 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 533 idiag = a->idiag+16*a->mbs - 16; 534 i2 = 4*m - 4; 535 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 536 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 537 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 538 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 539 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 540 idiag -= 16; 541 i2 -= 4; 542 for (i=m-2; i>=0; i--) { 543 v = aa + 16*(diag[i]+1); 544 vi = aj + diag[i] + 1; 545 nz = ai[i+1] - diag[i] - 1; 546 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 547 while (nz--) { 548 idx = 4*(*vi++); 549 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 550 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 551 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 552 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 553 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 554 v += 16; 555 } 556 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 557 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 558 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 559 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 560 idiag -= 16; 561 i2 -= 4; 562 } 563 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 564 } 565 } else { 566 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 567 } 568 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 569 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 575 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 576 { 577 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 578 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 579 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 580 const PetscScalar *b; 581 PetscErrorCode ierr; 582 PetscInt m = a->mbs,i,i2,nz,idx; 583 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 584 585 PetscFunctionBegin; 586 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 587 its = its*lits; 588 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 589 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 590 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 591 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 592 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 593 594 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 595 596 diag = a->diag; 597 idiag = a->idiag; 598 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 599 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 600 601 if (flag & SOR_ZERO_INITIAL_GUESS) { 602 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 603 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 604 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 605 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 606 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 607 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 608 i2 = 5; 609 idiag += 25; 610 for (i=1; i<m; i++) { 611 v = aa + 25*ai[i]; 612 vi = aj + ai[i]; 613 nz = diag[i] - ai[i]; 614 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 615 while (nz--) { 616 idx = 5*(*vi++); 617 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 618 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 619 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 620 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 621 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 622 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 623 v += 25; 624 } 625 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 626 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 627 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 628 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 629 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 630 idiag += 25; 631 i2 += 5; 632 } 633 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 634 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 635 } 636 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 637 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 638 i2 = 0; 639 mdiag = a->idiag+25*a->mbs; 640 for (i=0; i<m; i++) { 641 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 642 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 643 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 644 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 645 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 646 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 647 mdiag += 25; 648 i2 += 5; 649 } 650 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 651 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 652 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 653 } 654 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 655 idiag = a->idiag+25*a->mbs - 25; 656 i2 = 5*m - 5; 657 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 658 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 659 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 660 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 661 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 662 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 663 idiag -= 25; 664 i2 -= 5; 665 for (i=m-2; i>=0; i--) { 666 v = aa + 25*(diag[i]+1); 667 vi = aj + diag[i] + 1; 668 nz = ai[i+1] - diag[i] - 1; 669 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 670 while (nz--) { 671 idx = 5*(*vi++); 672 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 673 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 674 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 675 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 676 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 677 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 678 v += 25; 679 } 680 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 681 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 682 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 683 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 684 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 685 idiag -= 25; 686 i2 -= 5; 687 } 688 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 689 } 690 } else { 691 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 692 } 693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 694 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 700 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 701 { 702 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 703 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 704 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 705 const PetscScalar *b; 706 PetscErrorCode ierr; 707 PetscInt m = a->mbs,i,i2,nz,idx; 708 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 709 710 PetscFunctionBegin; 711 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 712 its = its*lits; 713 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 714 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 715 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 716 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 717 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 718 719 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 720 721 diag = a->diag; 722 idiag = a->idiag; 723 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 724 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 725 726 if (flag & SOR_ZERO_INITIAL_GUESS) { 727 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 728 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 729 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 730 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 731 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 732 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 733 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 734 i2 = 6; 735 idiag += 36; 736 for (i=1; i<m; i++) { 737 v = aa + 36*ai[i]; 738 vi = aj + ai[i]; 739 nz = diag[i] - ai[i]; 740 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 741 while (nz--) { 742 idx = 6*(*vi++); 743 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 744 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 745 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 746 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 747 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 748 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 749 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 750 v += 36; 751 } 752 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 753 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 754 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 755 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 756 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 757 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 758 idiag += 36; 759 i2 += 6; 760 } 761 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 762 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 763 } 764 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 765 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 766 i2 = 0; 767 mdiag = a->idiag+36*a->mbs; 768 for (i=0; i<m; i++) { 769 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 770 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 771 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 772 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 773 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 774 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 775 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 776 mdiag += 36; 777 i2 += 6; 778 } 779 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 780 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 781 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 782 } 783 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 784 idiag = a->idiag+36*a->mbs - 36; 785 i2 = 6*m - 6; 786 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 787 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 788 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 789 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 790 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 791 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 792 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 793 idiag -= 36; 794 i2 -= 6; 795 for (i=m-2; i>=0; i--) { 796 v = aa + 36*(diag[i]+1); 797 vi = aj + diag[i] + 1; 798 nz = ai[i+1] - diag[i] - 1; 799 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 800 while (nz--) { 801 idx = 6*(*vi++); 802 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 803 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 804 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 805 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 806 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 807 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 808 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 809 v += 36; 810 } 811 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 812 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 813 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 814 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 815 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 816 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 817 idiag -= 36; 818 i2 -= 6; 819 } 820 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 821 } 822 } else { 823 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 824 } 825 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 826 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 832 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 833 { 834 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 835 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 836 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 837 const PetscScalar *b; 838 PetscErrorCode ierr; 839 PetscInt m = a->mbs,i,i2,nz,idx; 840 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 841 842 PetscFunctionBegin; 843 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 844 its = its*lits; 845 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 846 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 847 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 848 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 849 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 850 851 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 852 853 diag = a->diag; 854 idiag = a->idiag; 855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 856 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 857 858 if (flag & SOR_ZERO_INITIAL_GUESS) { 859 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 860 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 861 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 862 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 863 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 864 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 865 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 866 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 867 i2 = 7; 868 idiag += 49; 869 for (i=1; i<m; i++) { 870 v = aa + 49*ai[i]; 871 vi = aj + ai[i]; 872 nz = diag[i] - ai[i]; 873 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 874 while (nz--) { 875 idx = 7*(*vi++); 876 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 877 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 878 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 879 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 880 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 881 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 882 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 883 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 884 v += 49; 885 } 886 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 887 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 888 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 889 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 890 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 891 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 892 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 893 idiag += 49; 894 i2 += 7; 895 } 896 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 897 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 898 } 899 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 900 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 901 i2 = 0; 902 mdiag = a->idiag+49*a->mbs; 903 for (i=0; i<m; i++) { 904 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 905 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 906 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 907 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 908 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 909 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 910 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 911 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 912 mdiag += 49; 913 i2 += 7; 914 } 915 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 916 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 917 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 918 } 919 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 920 idiag = a->idiag+49*a->mbs - 49; 921 i2 = 7*m - 7; 922 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 923 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 924 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 925 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 926 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 927 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 928 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 929 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 930 idiag -= 49; 931 i2 -= 7; 932 for (i=m-2; i>=0; i--) { 933 v = aa + 49*(diag[i]+1); 934 vi = aj + diag[i] + 1; 935 nz = ai[i+1] - diag[i] - 1; 936 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 937 while (nz--) { 938 idx = 7*(*vi++); 939 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 940 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 941 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 942 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 943 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 944 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 945 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 946 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 947 v += 49; 948 } 949 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 950 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 951 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 952 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 953 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 954 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 955 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 956 idiag -= 49; 957 i2 -= 7; 958 } 959 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 960 } 961 } else { 962 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 963 } 964 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 971 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 972 { 973 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 974 PetscScalar *x,*work,*w,*workt; 975 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 976 const PetscScalar *b; 977 PetscErrorCode ierr; 978 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 979 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 980 981 PetscFunctionBegin; 982 its = its*lits; 983 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 984 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 985 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 986 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 987 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 988 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 989 990 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 991 992 diag = a->diag; 993 idiag = a->idiag; 994 if (!a->mult_work) { 995 k = PetscMax(A->rmap->n,A->cmap->n); 996 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 997 } 998 work = a->mult_work; 999 if (!a->sor_work) { 1000 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1001 } 1002 w = a->sor_work; 1003 1004 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1006 1007 if (flag & SOR_ZERO_INITIAL_GUESS) { 1008 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1009 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1010 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1011 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1012 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1013 i2 = bs; 1014 idiag += bs2; 1015 for (i=1; i<m; i++) { 1016 v = aa + bs2*ai[i]; 1017 vi = aj + ai[i]; 1018 nz = diag[i] - ai[i]; 1019 1020 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1021 /* copy all rows of x that are needed into contiguous space */ 1022 workt = work; 1023 for (j=0; j<nz; j++) { 1024 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1025 workt += bs; 1026 } 1027 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1028 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1029 while (nz--) { 1030 idx = N*(*vi++); 1031 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1032 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1033 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1034 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1035 v += N2; 1036 } */ 1037 1038 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1039 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1040 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1041 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1042 1043 idiag += bs2; 1044 i2 += bs; 1045 } 1046 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1047 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1048 } 1049 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1050 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1051 i2 = 0; 1052 mdiag = a->idiag+bs2*a->mbs; 1053 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1054 for (i=0; i<m; i++) { 1055 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1056 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1057 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1058 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1059 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1060 1061 mdiag += bs2; 1062 i2 += bs; 1063 } 1064 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1065 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1066 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1067 } 1068 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1069 idiag = a->idiag+bs2*a->mbs - bs2; 1070 i2 = bs*m - bs; 1071 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1072 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1073 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1074 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1075 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1076 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1077 idiag -= bs2; 1078 i2 -= bs; 1079 for (i=m-2; i>=0; i--) { 1080 v = aa + bs2*(diag[i]+1); 1081 vi = aj + diag[i] + 1; 1082 nz = ai[i+1] - diag[i] - 1; 1083 1084 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1085 /* copy all rows of x that are needed into contiguous space */ 1086 workt = work; 1087 for (j=0; j<nz; j++) { 1088 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1089 workt += bs; 1090 } 1091 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1092 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1093 while (nz--) { 1094 idx = N*(*vi++); 1095 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1096 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1097 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1098 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1099 v += N2; 1100 } */ 1101 1102 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1103 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1104 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1105 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1106 idiag -= bs2; 1107 i2 -= bs; 1108 } 1109 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1110 } 1111 } else { 1112 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1113 } 1114 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1115 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 /* 1120 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1121 */ 1122 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1123 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1124 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1125 #define matsetvaluesblocked4_ matsetvaluesblocked4 1126 #endif 1127 1128 EXTERN_C_BEGIN 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "matsetvaluesblocked4_" 1131 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1132 { 1133 Mat A = *AA; 1134 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1135 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1136 PetscInt *ai=a->i,*ailen=a->ilen; 1137 PetscInt *aj=a->j,stepval,lastcol = -1; 1138 const PetscScalar *value = v; 1139 MatScalar *ap,*aa = a->a,*bap; 1140 1141 PetscFunctionBegin; 1142 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1143 stepval = (n-1)*4; 1144 for (k=0; k<m; k++) { /* loop over added rows */ 1145 row = im[k]; 1146 rp = aj + ai[row]; 1147 ap = aa + 16*ai[row]; 1148 nrow = ailen[row]; 1149 low = 0; 1150 high = nrow; 1151 for (l=0; l<n; l++) { /* loop over added columns */ 1152 col = in[l]; 1153 if (col <= lastcol) low = 0; else high = nrow; 1154 lastcol = col; 1155 value = v + k*(stepval+4 + l)*4; 1156 while (high-low > 7) { 1157 t = (low+high)/2; 1158 if (rp[t] > col) high = t; 1159 else low = t; 1160 } 1161 for (i=low; i<high; i++) { 1162 if (rp[i] > col) break; 1163 if (rp[i] == col) { 1164 bap = ap + 16*i; 1165 for (ii=0; ii<4; ii++,value+=stepval) { 1166 for (jj=ii; jj<16; jj+=4) { 1167 bap[jj] += *value++; 1168 } 1169 } 1170 goto noinsert2; 1171 } 1172 } 1173 N = nrow++ - 1; 1174 high++; /* added new column index thus must search to one higher than before */ 1175 /* shift up all the later entries in this row */ 1176 for (ii=N; ii>=i; ii--) { 1177 rp[ii+1] = rp[ii]; 1178 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1179 } 1180 if (N >= i) { 1181 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1182 } 1183 rp[i] = col; 1184 bap = ap + 16*i; 1185 for (ii=0; ii<4; ii++,value+=stepval) { 1186 for (jj=ii; jj<16; jj+=4) { 1187 bap[jj] = *value++; 1188 } 1189 } 1190 noinsert2:; 1191 low = i; 1192 } 1193 ailen[row] = nrow; 1194 } 1195 PetscFunctionReturnVoid(); 1196 } 1197 EXTERN_C_END 1198 1199 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1200 #define matsetvalues4_ MATSETVALUES4 1201 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1202 #define matsetvalues4_ matsetvalues4 1203 #endif 1204 1205 EXTERN_C_BEGIN 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatSetValues4_" 1208 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1209 { 1210 Mat A = *AA; 1211 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1212 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1213 PetscInt *ai=a->i,*ailen=a->ilen; 1214 PetscInt *aj=a->j,brow,bcol; 1215 PetscInt ridx,cidx,lastcol = -1; 1216 MatScalar *ap,value,*aa=a->a,*bap; 1217 1218 PetscFunctionBegin; 1219 for (k=0; k<m; k++) { /* loop over added rows */ 1220 row = im[k]; brow = row/4; 1221 rp = aj + ai[brow]; 1222 ap = aa + 16*ai[brow]; 1223 nrow = ailen[brow]; 1224 low = 0; 1225 high = nrow; 1226 for (l=0; l<n; l++) { /* loop over added columns */ 1227 col = in[l]; bcol = col/4; 1228 ridx = row % 4; cidx = col % 4; 1229 value = v[l + k*n]; 1230 if (col <= lastcol) low = 0; else high = nrow; 1231 lastcol = col; 1232 while (high-low > 7) { 1233 t = (low+high)/2; 1234 if (rp[t] > bcol) high = t; 1235 else low = t; 1236 } 1237 for (i=low; i<high; i++) { 1238 if (rp[i] > bcol) break; 1239 if (rp[i] == bcol) { 1240 bap = ap + 16*i + 4*cidx + ridx; 1241 *bap += value; 1242 goto noinsert1; 1243 } 1244 } 1245 N = nrow++ - 1; 1246 high++; /* added new column thus must search to one higher than before */ 1247 /* shift up all the later entries in this row */ 1248 for (ii=N; ii>=i; ii--) { 1249 rp[ii+1] = rp[ii]; 1250 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1251 } 1252 if (N>=i) { 1253 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1254 } 1255 rp[i] = bcol; 1256 ap[16*i + 4*cidx + ridx] = value; 1257 noinsert1:; 1258 low = i; 1259 } 1260 ailen[brow] = nrow; 1261 } 1262 PetscFunctionReturnVoid(); 1263 } 1264 EXTERN_C_END 1265 1266 /* 1267 Checks for missing diagonals 1268 */ 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1271 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1272 { 1273 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1274 PetscErrorCode ierr; 1275 PetscInt *diag,*jj = a->j,i; 1276 1277 PetscFunctionBegin; 1278 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1279 *missing = PETSC_FALSE; 1280 if (A->rmap->n > 0 && !jj) { 1281 *missing = PETSC_TRUE; 1282 if (d) *d = 0; 1283 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1284 } else { 1285 diag = a->diag; 1286 for (i=0; i<a->mbs; i++) { 1287 if (jj[diag[i]] != i) { 1288 *missing = PETSC_TRUE; 1289 if (d) *d = i; 1290 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1291 } 1292 } 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1299 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1300 { 1301 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1302 PetscErrorCode ierr; 1303 PetscInt i,j,m = a->mbs; 1304 1305 PetscFunctionBegin; 1306 if (!a->diag) { 1307 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1308 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1309 a->free_diag = PETSC_TRUE; 1310 } 1311 for (i=0; i<m; i++) { 1312 a->diag[i] = a->i[i+1]; 1313 for (j=a->i[i]; j<a->i[i+1]; j++) { 1314 if (a->j[j] == i) { 1315 a->diag[i] = j; 1316 break; 1317 } 1318 } 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 1324 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1328 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1329 { 1330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1333 PetscInt *tia, *tja; 1334 1335 PetscFunctionBegin; 1336 *nn = n; 1337 if (!ia) PetscFunctionReturn(0); 1338 if (symmetric) { 1339 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1340 nz = tia[n]; 1341 } else { 1342 tia = a->i; tja = a->j; 1343 } 1344 1345 if (!blockcompressed && bs > 1) { 1346 (*nn) *= bs; 1347 /* malloc & create the natural set of indices */ 1348 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1349 if (n) { 1350 (*ia)[0] = 0; 1351 for (j=1; j<bs; j++) { 1352 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1353 } 1354 } 1355 1356 for (i=1; i<n; i++) { 1357 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1358 for (j=1; j<bs; j++) { 1359 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1360 } 1361 } 1362 if (n) { 1363 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1364 } 1365 1366 if (ja) { 1367 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1368 cnt = 0; 1369 for (i=0; i<n; i++) { 1370 for (j=0; j<bs; j++) { 1371 for (k=tia[i]; k<tia[i+1]; k++) { 1372 for (l=0; l<bs; l++) { 1373 (*ja)[cnt++] = bs*tja[k] + l; 1374 } 1375 } 1376 } 1377 } 1378 } 1379 1380 n *= bs; 1381 nz *= bs*bs; 1382 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1383 ierr = PetscFree(tia);CHKERRQ(ierr); 1384 ierr = PetscFree(tja);CHKERRQ(ierr); 1385 } 1386 } else if (oshift == 1) { 1387 if (symmetric) { 1388 PetscInt nz = tia[A->rmap->n/bs]; 1389 /* add 1 to i and j indices */ 1390 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1391 *ia = tia; 1392 if (ja) { 1393 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1394 *ja = tja; 1395 } 1396 } else { 1397 PetscInt nz = a->i[A->rmap->n/bs]; 1398 /* malloc space and add 1 to i and j indices */ 1399 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1400 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1401 if (ja) { 1402 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1403 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1404 } 1405 } 1406 } else { 1407 *ia = tia; 1408 if (ja) *ja = tja; 1409 } 1410 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1416 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1417 { 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 if (!ia) PetscFunctionReturn(0); 1422 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1423 ierr = PetscFree(*ia);CHKERRQ(ierr); 1424 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1431 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1432 { 1433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1434 PetscErrorCode ierr; 1435 1436 PetscFunctionBegin; 1437 #if defined(PETSC_USE_LOG) 1438 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1439 #endif 1440 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1441 if (a->row) { 1442 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1443 } 1444 if (a->col) { 1445 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1446 } 1447 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1448 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1449 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1450 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1451 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1452 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1453 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1454 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1455 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1456 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1457 1458 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1459 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1460 ierr = PetscFree(a);CHKERRQ(ierr); 1461 1462 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1466 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1467 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1468 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1469 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1470 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1476 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1477 { 1478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 switch (op) { 1483 case MAT_ROW_ORIENTED: 1484 a->roworiented = flg; 1485 break; 1486 case MAT_KEEP_NONZERO_PATTERN: 1487 a->keepnonzeropattern = flg; 1488 break; 1489 case MAT_NEW_NONZERO_LOCATIONS: 1490 a->nonew = (flg ? 0 : 1); 1491 break; 1492 case MAT_NEW_NONZERO_LOCATION_ERR: 1493 a->nonew = (flg ? -1 : 0); 1494 break; 1495 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1496 a->nonew = (flg ? -2 : 0); 1497 break; 1498 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1499 a->nounused = (flg ? -1 : 0); 1500 break; 1501 case MAT_CHECK_COMPRESSED_ROW: 1502 a->compressedrow.check = flg; 1503 break; 1504 case MAT_NEW_DIAGONALS: 1505 case MAT_IGNORE_OFF_PROC_ENTRIES: 1506 case MAT_USE_HASH_TABLE: 1507 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1508 break; 1509 case MAT_SYMMETRIC: 1510 case MAT_STRUCTURALLY_SYMMETRIC: 1511 case MAT_HERMITIAN: 1512 case MAT_SYMMETRY_ETERNAL: 1513 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1514 break; 1515 default: 1516 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1523 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1524 { 1525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1526 PetscErrorCode ierr; 1527 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1528 MatScalar *aa,*aa_i; 1529 PetscScalar *v_i; 1530 1531 PetscFunctionBegin; 1532 bs = A->rmap->bs; 1533 ai = a->i; 1534 aj = a->j; 1535 aa = a->a; 1536 bs2 = a->bs2; 1537 1538 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1539 1540 bn = row/bs; /* Block number */ 1541 bp = row % bs; /* Block Position */ 1542 M = ai[bn+1] - ai[bn]; 1543 *nz = bs*M; 1544 1545 if (v) { 1546 *v = 0; 1547 if (*nz) { 1548 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1549 for (i=0; i<M; i++) { /* for each block in the block row */ 1550 v_i = *v + i*bs; 1551 aa_i = aa + bs2*(ai[bn] + i); 1552 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1553 } 1554 } 1555 } 1556 1557 if (idx) { 1558 *idx = 0; 1559 if (*nz) { 1560 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1561 for (i=0; i<M; i++) { /* for each block in the block row */ 1562 idx_i = *idx + i*bs; 1563 itmp = bs*aj[ai[bn] + i]; 1564 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1565 } 1566 } 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1573 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1574 { 1575 PetscErrorCode ierr; 1576 1577 PetscFunctionBegin; 1578 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1579 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1580 PetscFunctionReturn(0); 1581 } 1582 1583 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1587 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1588 { 1589 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1590 Mat C; 1591 PetscErrorCode ierr; 1592 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1593 PetscInt *rows,*cols,bs2=a->bs2; 1594 MatScalar *array; 1595 1596 PetscFunctionBegin; 1597 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1598 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1599 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1600 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1601 1602 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1603 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1604 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1605 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1606 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1607 ierr = PetscFree(col);CHKERRQ(ierr); 1608 } else { 1609 C = *B; 1610 } 1611 1612 array = a->a; 1613 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1614 for (i=0; i<mbs; i++) { 1615 cols[0] = i*bs; 1616 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1617 len = ai[i+1] - ai[i]; 1618 for (j=0; j<len; j++) { 1619 rows[0] = (*aj++)*bs; 1620 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1621 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1622 array += bs2; 1623 } 1624 } 1625 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1626 1627 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1628 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1629 1630 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1631 *B = C; 1632 } else { 1633 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1640 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1641 { 1642 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1643 PetscErrorCode ierr; 1644 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1645 int fd; 1646 PetscScalar *aa; 1647 FILE *file; 1648 1649 PetscFunctionBegin; 1650 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1651 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1652 col_lens[0] = MAT_FILE_CLASSID; 1653 1654 col_lens[1] = A->rmap->N; 1655 col_lens[2] = A->cmap->n; 1656 col_lens[3] = a->nz*bs2; 1657 1658 /* store lengths of each row and write (including header) to file */ 1659 count = 0; 1660 for (i=0; i<a->mbs; i++) { 1661 for (j=0; j<bs; j++) { 1662 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1663 } 1664 } 1665 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1666 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1667 1668 /* store column indices (zero start index) */ 1669 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1670 count = 0; 1671 for (i=0; i<a->mbs; i++) { 1672 for (j=0; j<bs; j++) { 1673 for (k=a->i[i]; k<a->i[i+1]; k++) { 1674 for (l=0; l<bs; l++) { 1675 jj[count++] = bs*a->j[k] + l; 1676 } 1677 } 1678 } 1679 } 1680 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1681 ierr = PetscFree(jj);CHKERRQ(ierr); 1682 1683 /* store nonzero values */ 1684 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1685 count = 0; 1686 for (i=0; i<a->mbs; i++) { 1687 for (j=0; j<bs; j++) { 1688 for (k=a->i[i]; k<a->i[i+1]; k++) { 1689 for (l=0; l<bs; l++) { 1690 aa[count++] = a->a[bs2*k + l*bs + j]; 1691 } 1692 } 1693 } 1694 } 1695 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1696 ierr = PetscFree(aa);CHKERRQ(ierr); 1697 1698 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1699 if (file) { 1700 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1701 } 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1707 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1708 { 1709 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1710 PetscErrorCode ierr; 1711 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1712 PetscViewerFormat format; 1713 1714 PetscFunctionBegin; 1715 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1716 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1717 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1718 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1719 Mat aij; 1720 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1721 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1722 ierr = MatDestroy(aij);CHKERRQ(ierr); 1723 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1724 PetscFunctionReturn(0); 1725 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1726 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1727 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1728 for (i=0; i<a->mbs; i++) { 1729 for (j=0; j<bs; j++) { 1730 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1731 for (k=a->i[i]; k<a->i[i+1]; k++) { 1732 for (l=0; l<bs; l++) { 1733 #if defined(PETSC_USE_COMPLEX) 1734 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1735 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1736 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1737 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1738 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1739 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1740 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1741 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1742 } 1743 #else 1744 if (a->a[bs2*k + l*bs + j] != 0.0) { 1745 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1746 } 1747 #endif 1748 } 1749 } 1750 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1751 } 1752 } 1753 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1754 } else { 1755 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1756 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1757 for (i=0; i<a->mbs; i++) { 1758 for (j=0; j<bs; j++) { 1759 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1760 for (k=a->i[i]; k<a->i[i+1]; k++) { 1761 for (l=0; l<bs; l++) { 1762 #if defined(PETSC_USE_COMPLEX) 1763 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1764 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1765 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1766 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1767 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1768 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1769 } else { 1770 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1771 } 1772 #else 1773 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1774 #endif 1775 } 1776 } 1777 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1778 } 1779 } 1780 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1781 } 1782 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 1786 #undef __FUNCT__ 1787 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1788 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1789 { 1790 Mat A = (Mat) Aa; 1791 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1792 PetscErrorCode ierr; 1793 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1794 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1795 MatScalar *aa; 1796 PetscViewer viewer; 1797 1798 PetscFunctionBegin; 1799 1800 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1801 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1802 1803 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1804 1805 /* loop over matrix elements drawing boxes */ 1806 color = PETSC_DRAW_BLUE; 1807 for (i=0,row=0; i<mbs; i++,row+=bs) { 1808 for (j=a->i[i]; j<a->i[i+1]; j++) { 1809 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1810 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1811 aa = a->a + j*bs2; 1812 for (k=0; k<bs; k++) { 1813 for (l=0; l<bs; l++) { 1814 if (PetscRealPart(*aa++) >= 0.) continue; 1815 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1816 } 1817 } 1818 } 1819 } 1820 color = PETSC_DRAW_CYAN; 1821 for (i=0,row=0; i<mbs; i++,row+=bs) { 1822 for (j=a->i[i]; j<a->i[i+1]; j++) { 1823 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1824 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1825 aa = a->a + j*bs2; 1826 for (k=0; k<bs; k++) { 1827 for (l=0; l<bs; l++) { 1828 if (PetscRealPart(*aa++) != 0.) continue; 1829 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1830 } 1831 } 1832 } 1833 } 1834 1835 color = PETSC_DRAW_RED; 1836 for (i=0,row=0; i<mbs; i++,row+=bs) { 1837 for (j=a->i[i]; j<a->i[i+1]; j++) { 1838 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1839 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1840 aa = a->a + j*bs2; 1841 for (k=0; k<bs; k++) { 1842 for (l=0; l<bs; l++) { 1843 if (PetscRealPart(*aa++) <= 0.) continue; 1844 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1845 } 1846 } 1847 } 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1854 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1855 { 1856 PetscErrorCode ierr; 1857 PetscReal xl,yl,xr,yr,w,h; 1858 PetscDraw draw; 1859 PetscBool isnull; 1860 1861 PetscFunctionBegin; 1862 1863 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1864 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1865 1866 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1867 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1868 xr += w; yr += h; xl = -w; yl = -h; 1869 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1870 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1871 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatView_SeqBAIJ" 1877 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1878 { 1879 PetscErrorCode ierr; 1880 PetscBool iascii,isbinary,isdraw; 1881 1882 PetscFunctionBegin; 1883 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1884 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1885 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1886 if (iascii){ 1887 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1888 } else if (isbinary) { 1889 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1890 } else if (isdraw) { 1891 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1892 } else { 1893 Mat B; 1894 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1895 ierr = MatView(B,viewer);CHKERRQ(ierr); 1896 ierr = MatDestroy(B);CHKERRQ(ierr); 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1904 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1905 { 1906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1907 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1908 PetscInt *ai = a->i,*ailen = a->ilen; 1909 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1910 MatScalar *ap,*aa = a->a; 1911 1912 PetscFunctionBegin; 1913 for (k=0; k<m; k++) { /* loop over rows */ 1914 row = im[k]; brow = row/bs; 1915 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1916 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1917 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1918 nrow = ailen[brow]; 1919 for (l=0; l<n; l++) { /* loop over columns */ 1920 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1921 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1922 col = in[l] ; 1923 bcol = col/bs; 1924 cidx = col%bs; 1925 ridx = row%bs; 1926 high = nrow; 1927 low = 0; /* assume unsorted */ 1928 while (high-low > 5) { 1929 t = (low+high)/2; 1930 if (rp[t] > bcol) high = t; 1931 else low = t; 1932 } 1933 for (i=low; i<high; i++) { 1934 if (rp[i] > bcol) break; 1935 if (rp[i] == bcol) { 1936 *v++ = ap[bs2*i+bs*cidx+ridx]; 1937 goto finished; 1938 } 1939 } 1940 *v++ = 0.0; 1941 finished:; 1942 } 1943 } 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1949 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1950 { 1951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1952 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1953 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1954 PetscErrorCode ierr; 1955 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1956 PetscBool roworiented=a->roworiented; 1957 const PetscScalar *value = v; 1958 MatScalar *ap,*aa = a->a,*bap; 1959 1960 PetscFunctionBegin; 1961 if (roworiented) { 1962 stepval = (n-1)*bs; 1963 } else { 1964 stepval = (m-1)*bs; 1965 } 1966 for (k=0; k<m; k++) { /* loop over added rows */ 1967 row = im[k]; 1968 if (row < 0) continue; 1969 #if defined(PETSC_USE_DEBUG) 1970 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1971 #endif 1972 rp = aj + ai[row]; 1973 ap = aa + bs2*ai[row]; 1974 rmax = imax[row]; 1975 nrow = ailen[row]; 1976 low = 0; 1977 high = nrow; 1978 for (l=0; l<n; l++) { /* loop over added columns */ 1979 if (in[l] < 0) continue; 1980 #if defined(PETSC_USE_DEBUG) 1981 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1982 #endif 1983 col = in[l]; 1984 if (roworiented) { 1985 value = v + (k*(stepval+bs) + l)*bs; 1986 } else { 1987 value = v + (l*(stepval+bs) + k)*bs; 1988 } 1989 if (col <= lastcol) low = 0; else high = nrow; 1990 lastcol = col; 1991 while (high-low > 7) { 1992 t = (low+high)/2; 1993 if (rp[t] > col) high = t; 1994 else low = t; 1995 } 1996 for (i=low; i<high; i++) { 1997 if (rp[i] > col) break; 1998 if (rp[i] == col) { 1999 bap = ap + bs2*i; 2000 if (roworiented) { 2001 if (is == ADD_VALUES) { 2002 for (ii=0; ii<bs; ii++,value+=stepval) { 2003 for (jj=ii; jj<bs2; jj+=bs) { 2004 bap[jj] += *value++; 2005 } 2006 } 2007 } else { 2008 for (ii=0; ii<bs; ii++,value+=stepval) { 2009 for (jj=ii; jj<bs2; jj+=bs) { 2010 bap[jj] = *value++; 2011 } 2012 } 2013 } 2014 } else { 2015 if (is == ADD_VALUES) { 2016 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2017 for (jj=0; jj<bs; jj++) { 2018 bap[jj] += value[jj]; 2019 } 2020 bap += bs; 2021 } 2022 } else { 2023 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2024 for (jj=0; jj<bs; jj++) { 2025 bap[jj] = value[jj]; 2026 } 2027 bap += bs; 2028 } 2029 } 2030 } 2031 goto noinsert2; 2032 } 2033 } 2034 if (nonew == 1) goto noinsert2; 2035 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2036 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2037 N = nrow++ - 1; high++; 2038 /* shift up all the later entries in this row */ 2039 for (ii=N; ii>=i; ii--) { 2040 rp[ii+1] = rp[ii]; 2041 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2042 } 2043 if (N >= i) { 2044 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2045 } 2046 rp[i] = col; 2047 bap = ap + bs2*i; 2048 if (roworiented) { 2049 for (ii=0; ii<bs; ii++,value+=stepval) { 2050 for (jj=ii; jj<bs2; jj+=bs) { 2051 bap[jj] = *value++; 2052 } 2053 } 2054 } else { 2055 for (ii=0; ii<bs; ii++,value+=stepval) { 2056 for (jj=0; jj<bs; jj++) { 2057 *bap++ = *value++; 2058 } 2059 } 2060 } 2061 noinsert2:; 2062 low = i; 2063 } 2064 ailen[row] = nrow; 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2071 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2072 { 2073 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2074 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2075 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2076 PetscErrorCode ierr; 2077 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2078 MatScalar *aa = a->a,*ap; 2079 PetscReal ratio=0.6; 2080 2081 PetscFunctionBegin; 2082 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2083 2084 if (m) rmax = ailen[0]; 2085 for (i=1; i<mbs; i++) { 2086 /* move each row back by the amount of empty slots (fshift) before it*/ 2087 fshift += imax[i-1] - ailen[i-1]; 2088 rmax = PetscMax(rmax,ailen[i]); 2089 if (fshift) { 2090 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2091 N = ailen[i]; 2092 for (j=0; j<N; j++) { 2093 ip[j-fshift] = ip[j]; 2094 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2095 } 2096 } 2097 ai[i] = ai[i-1] + ailen[i-1]; 2098 } 2099 if (mbs) { 2100 fshift += imax[mbs-1] - ailen[mbs-1]; 2101 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2102 } 2103 /* reset ilen and imax for each row */ 2104 for (i=0; i<mbs; i++) { 2105 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2106 } 2107 a->nz = ai[mbs]; 2108 2109 /* diagonals may have moved, so kill the diagonal pointers */ 2110 a->idiagvalid = PETSC_FALSE; 2111 if (fshift && a->diag) { 2112 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2113 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2114 a->diag = 0; 2115 } 2116 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2117 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2118 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2119 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2120 A->info.mallocs += a->reallocs; 2121 a->reallocs = 0; 2122 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2123 2124 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2125 A->same_nonzero = PETSC_TRUE; 2126 PetscFunctionReturn(0); 2127 } 2128 2129 /* 2130 This function returns an array of flags which indicate the locations of contiguous 2131 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2132 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2133 Assume: sizes should be long enough to hold all the values. 2134 */ 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2137 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2138 { 2139 PetscInt i,j,k,row; 2140 PetscBool flg; 2141 2142 PetscFunctionBegin; 2143 for (i=0,j=0; i<n; j++) { 2144 row = idx[i]; 2145 if (row%bs!=0) { /* Not the begining of a block */ 2146 sizes[j] = 1; 2147 i++; 2148 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2149 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2150 i++; 2151 } else { /* Begining of the block, so check if the complete block exists */ 2152 flg = PETSC_TRUE; 2153 for (k=1; k<bs; k++) { 2154 if (row+k != idx[i+k]) { /* break in the block */ 2155 flg = PETSC_FALSE; 2156 break; 2157 } 2158 } 2159 if (flg) { /* No break in the bs */ 2160 sizes[j] = bs; 2161 i+= bs; 2162 } else { 2163 sizes[j] = 1; 2164 i++; 2165 } 2166 } 2167 } 2168 *bs_max = j; 2169 PetscFunctionReturn(0); 2170 } 2171 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2174 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2175 { 2176 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2177 PetscErrorCode ierr; 2178 PetscInt i,j,k,count,*rows; 2179 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2180 PetscScalar zero = 0.0; 2181 MatScalar *aa; 2182 const PetscScalar *xx; 2183 PetscScalar *bb; 2184 2185 PetscFunctionBegin; 2186 /* fix right hand side if needed */ 2187 if (x && b) { 2188 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2189 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2190 for (i=0; i<is_n; i++) { 2191 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2192 } 2193 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2194 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2195 } 2196 2197 /* Make a copy of the IS and sort it */ 2198 /* allocate memory for rows,sizes */ 2199 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2200 2201 /* copy IS values to rows, and sort them */ 2202 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2203 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2204 2205 if (baij->keepnonzeropattern) { 2206 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2207 bs_max = is_n; 2208 A->same_nonzero = PETSC_TRUE; 2209 } else { 2210 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2211 A->same_nonzero = PETSC_FALSE; 2212 } 2213 2214 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2215 row = rows[j]; 2216 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2217 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2218 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2219 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2220 if (diag != 0.0) { 2221 if (baij->ilen[row/bs] > 0) { 2222 baij->ilen[row/bs] = 1; 2223 baij->j[baij->i[row/bs]] = row/bs; 2224 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2225 } 2226 /* Now insert all the diagonal values for this bs */ 2227 for (k=0; k<bs; k++) { 2228 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2229 } 2230 } else { /* (diag == 0.0) */ 2231 baij->ilen[row/bs] = 0; 2232 } /* end (diag == 0.0) */ 2233 } else { /* (sizes[i] != bs) */ 2234 #if defined (PETSC_USE_DEBUG) 2235 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2236 #endif 2237 for (k=0; k<count; k++) { 2238 aa[0] = zero; 2239 aa += bs; 2240 } 2241 if (diag != 0.0) { 2242 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2243 } 2244 } 2245 } 2246 2247 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2248 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2254 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2255 { 2256 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2257 PetscErrorCode ierr; 2258 PetscInt i,j,k,count; 2259 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2260 PetscScalar zero = 0.0; 2261 MatScalar *aa; 2262 const PetscScalar *xx; 2263 PetscScalar *bb; 2264 PetscBool *zeroed,vecs = PETSC_FALSE; 2265 2266 PetscFunctionBegin; 2267 /* fix right hand side if needed */ 2268 if (x && b) { 2269 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2270 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2271 vecs = PETSC_TRUE; 2272 } 2273 A->same_nonzero = PETSC_TRUE; 2274 2275 /* zero the columns */ 2276 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2277 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2278 for (i=0; i<is_n; i++) { 2279 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2280 zeroed[is_idx[i]] = PETSC_TRUE; 2281 } 2282 for (i=0; i<A->rmap->N; i++) { 2283 if (!zeroed[i]) { 2284 row = i/bs; 2285 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2286 for (k=0; k<bs; k++) { 2287 col = bs*baij->j[j] + k; 2288 if (zeroed[col]) { 2289 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2290 if (vecs) bb[i] -= aa[0]*xx[col]; 2291 aa[0] = 0.0; 2292 } 2293 } 2294 } 2295 } else if (vecs) bb[i] = diag*xx[i]; 2296 } 2297 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2298 if (vecs) { 2299 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2300 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2301 } 2302 2303 /* zero the rows */ 2304 for (i=0; i<is_n; i++) { 2305 row = is_idx[i]; 2306 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2307 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2308 for (k=0; k<count; k++) { 2309 aa[0] = zero; 2310 aa += bs; 2311 } 2312 if (diag != 0.0) { 2313 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2314 } 2315 } 2316 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2322 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2323 { 2324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2325 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2326 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2327 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2328 PetscErrorCode ierr; 2329 PetscInt ridx,cidx,bs2=a->bs2; 2330 PetscBool roworiented=a->roworiented; 2331 MatScalar *ap,value,*aa=a->a,*bap; 2332 2333 PetscFunctionBegin; 2334 if (v) PetscValidScalarPointer(v,6); 2335 for (k=0; k<m; k++) { /* loop over added rows */ 2336 row = im[k]; 2337 brow = row/bs; 2338 if (row < 0) continue; 2339 #if defined(PETSC_USE_DEBUG) 2340 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2341 #endif 2342 rp = aj + ai[brow]; 2343 ap = aa + bs2*ai[brow]; 2344 rmax = imax[brow]; 2345 nrow = ailen[brow]; 2346 low = 0; 2347 high = nrow; 2348 for (l=0; l<n; l++) { /* loop over added columns */ 2349 if (in[l] < 0) continue; 2350 #if defined(PETSC_USE_DEBUG) 2351 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2352 #endif 2353 col = in[l]; bcol = col/bs; 2354 ridx = row % bs; cidx = col % bs; 2355 if (roworiented) { 2356 value = v[l + k*n]; 2357 } else { 2358 value = v[k + l*m]; 2359 } 2360 if (col <= lastcol) low = 0; else high = nrow; 2361 lastcol = col; 2362 while (high-low > 7) { 2363 t = (low+high)/2; 2364 if (rp[t] > bcol) high = t; 2365 else low = t; 2366 } 2367 for (i=low; i<high; i++) { 2368 if (rp[i] > bcol) break; 2369 if (rp[i] == bcol) { 2370 bap = ap + bs2*i + bs*cidx + ridx; 2371 if (is == ADD_VALUES) *bap += value; 2372 else *bap = value; 2373 goto noinsert1; 2374 } 2375 } 2376 if (nonew == 1) goto noinsert1; 2377 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2378 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2379 N = nrow++ - 1; high++; 2380 /* shift up all the later entries in this row */ 2381 for (ii=N; ii>=i; ii--) { 2382 rp[ii+1] = rp[ii]; 2383 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2384 } 2385 if (N>=i) { 2386 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2387 } 2388 rp[i] = bcol; 2389 ap[bs2*i + bs*cidx + ridx] = value; 2390 a->nz++; 2391 noinsert1:; 2392 low = i; 2393 } 2394 ailen[brow] = nrow; 2395 } 2396 A->same_nonzero = PETSC_FALSE; 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2402 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2403 { 2404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2405 Mat outA; 2406 PetscErrorCode ierr; 2407 PetscBool row_identity,col_identity; 2408 2409 PetscFunctionBegin; 2410 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2411 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2412 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2413 if (!row_identity || !col_identity) { 2414 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2415 } 2416 2417 outA = inA; 2418 inA->factortype = MAT_FACTOR_LU; 2419 2420 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2421 2422 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2423 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2424 a->row = row; 2425 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2426 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2427 a->col = col; 2428 2429 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2430 if (a->icol) { 2431 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2432 } 2433 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2434 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2435 2436 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2437 if (!a->solve_work) { 2438 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2439 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2440 } 2441 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2442 2443 PetscFunctionReturn(0); 2444 } 2445 2446 EXTERN_C_BEGIN 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2449 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2450 { 2451 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2452 PetscInt i,nz,mbs; 2453 2454 PetscFunctionBegin; 2455 nz = baij->maxnz; 2456 mbs = baij->mbs; 2457 for (i=0; i<nz; i++) { 2458 baij->j[i] = indices[i]; 2459 } 2460 baij->nz = nz; 2461 for (i=0; i<mbs; i++) { 2462 baij->ilen[i] = baij->imax[i]; 2463 } 2464 PetscFunctionReturn(0); 2465 } 2466 EXTERN_C_END 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2470 /*@ 2471 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2472 in the matrix. 2473 2474 Input Parameters: 2475 + mat - the SeqBAIJ matrix 2476 - indices - the column indices 2477 2478 Level: advanced 2479 2480 Notes: 2481 This can be called if you have precomputed the nonzero structure of the 2482 matrix and want to provide it to the matrix object to improve the performance 2483 of the MatSetValues() operation. 2484 2485 You MUST have set the correct numbers of nonzeros per row in the call to 2486 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2487 2488 MUST be called before any calls to MatSetValues(); 2489 2490 @*/ 2491 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2492 { 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2497 PetscValidPointer(indices,2); 2498 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 #undef __FUNCT__ 2503 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2504 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2505 { 2506 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2507 PetscErrorCode ierr; 2508 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2509 PetscReal atmp; 2510 PetscScalar *x,zero = 0.0; 2511 MatScalar *aa; 2512 PetscInt ncols,brow,krow,kcol; 2513 2514 PetscFunctionBegin; 2515 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2516 bs = A->rmap->bs; 2517 aa = a->a; 2518 ai = a->i; 2519 aj = a->j; 2520 mbs = a->mbs; 2521 2522 ierr = VecSet(v,zero);CHKERRQ(ierr); 2523 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2524 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2525 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2526 for (i=0; i<mbs; i++) { 2527 ncols = ai[1] - ai[0]; ai++; 2528 brow = bs*i; 2529 for (j=0; j<ncols; j++){ 2530 for (kcol=0; kcol<bs; kcol++){ 2531 for (krow=0; krow<bs; krow++){ 2532 atmp = PetscAbsScalar(*aa);aa++; 2533 row = brow + krow; /* row index */ 2534 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2535 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2536 } 2537 } 2538 aj++; 2539 } 2540 } 2541 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 #undef __FUNCT__ 2546 #define __FUNCT__ "MatCopy_SeqBAIJ" 2547 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2548 { 2549 PetscErrorCode ierr; 2550 2551 PetscFunctionBegin; 2552 /* If the two matrices have the same copy implementation, use fast copy. */ 2553 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2554 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2555 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2556 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2557 2558 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2559 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2560 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2561 } else { 2562 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2563 } 2564 PetscFunctionReturn(0); 2565 } 2566 2567 #undef __FUNCT__ 2568 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2569 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2570 { 2571 PetscErrorCode ierr; 2572 2573 PetscFunctionBegin; 2574 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 #undef __FUNCT__ 2579 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2580 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2581 { 2582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2583 PetscFunctionBegin; 2584 *array = a->a; 2585 PetscFunctionReturn(0); 2586 } 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2590 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2591 { 2592 PetscFunctionBegin; 2593 PetscFunctionReturn(0); 2594 } 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2598 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2599 { 2600 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2601 PetscErrorCode ierr; 2602 PetscInt i,bs=Y->rmap->bs,j,bs2; 2603 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2604 2605 PetscFunctionBegin; 2606 if (str == SAME_NONZERO_PATTERN) { 2607 PetscScalar alpha = a; 2608 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2609 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2610 if (y->xtoy && y->XtoY != X) { 2611 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2612 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2613 } 2614 if (!y->xtoy) { /* get xtoy */ 2615 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2616 y->XtoY = X; 2617 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2618 } 2619 bs2 = bs*bs; 2620 for (i=0; i<x->nz; i++) { 2621 j = 0; 2622 while (j < bs2){ 2623 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2624 j++; 2625 } 2626 } 2627 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2628 } else { 2629 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2630 } 2631 PetscFunctionReturn(0); 2632 } 2633 2634 #undef __FUNCT__ 2635 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2636 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2637 { 2638 PetscInt rbs,cbs; 2639 PetscErrorCode ierr; 2640 2641 PetscFunctionBegin; 2642 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2643 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2644 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2645 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2646 PetscFunctionReturn(0); 2647 } 2648 2649 #undef __FUNCT__ 2650 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2651 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2652 { 2653 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2654 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2655 MatScalar *aa = a->a; 2656 2657 PetscFunctionBegin; 2658 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2664 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2665 { 2666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2667 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2668 MatScalar *aa = a->a; 2669 2670 PetscFunctionBegin; 2671 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2672 PetscFunctionReturn(0); 2673 } 2674 2675 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2676 2677 #undef __FUNCT__ 2678 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2679 /* 2680 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2681 */ 2682 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2683 { 2684 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2685 PetscErrorCode ierr; 2686 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2687 PetscInt nz = a->i[m],row,*jj,mr,col; 2688 2689 PetscFunctionBegin; 2690 *nn = n; 2691 if (!ia) PetscFunctionReturn(0); 2692 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2693 else { 2694 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2695 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2696 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2697 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2698 jj = a->j; 2699 for (i=0; i<nz; i++) { 2700 collengths[jj[i]]++; 2701 } 2702 cia[0] = oshift; 2703 for (i=0; i<n; i++) { 2704 cia[i+1] = cia[i] + collengths[i]; 2705 } 2706 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2707 jj = a->j; 2708 for (row=0; row<m; row++) { 2709 mr = a->i[row+1] - a->i[row]; 2710 for (i=0; i<mr; i++) { 2711 col = *jj++; 2712 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2713 } 2714 } 2715 ierr = PetscFree(collengths);CHKERRQ(ierr); 2716 *ia = cia; *ja = cja; 2717 } 2718 PetscFunctionReturn(0); 2719 } 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2723 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2724 { 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 if (!ia) PetscFunctionReturn(0); 2729 ierr = PetscFree(*ia);CHKERRQ(ierr); 2730 ierr = PetscFree(*ja);CHKERRQ(ierr); 2731 PetscFunctionReturn(0); 2732 } 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2736 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2737 { 2738 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2739 PetscErrorCode ierr; 2740 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2741 PetscScalar dx,*y,*xx,*w3_array; 2742 PetscScalar *vscale_array; 2743 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2744 Vec w1=coloring->w1,w2=coloring->w2,w3; 2745 void *fctx = coloring->fctx; 2746 PetscBool flg = PETSC_FALSE; 2747 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2748 Vec x1_tmp; 2749 2750 PetscFunctionBegin; 2751 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2752 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2753 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2754 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2755 2756 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2757 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2758 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2759 if (flg) { 2760 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2761 } else { 2762 PetscBool assembled; 2763 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2764 if (assembled) { 2765 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2766 } 2767 } 2768 2769 x1_tmp = x1; 2770 if (!coloring->vscale){ 2771 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2772 } 2773 2774 /* 2775 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2776 coloring->F for the coarser grids from the finest 2777 */ 2778 if (coloring->F) { 2779 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2780 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2781 if (m1 != m2) { 2782 coloring->F = 0; 2783 } 2784 } 2785 2786 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2787 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2788 } 2789 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2790 2791 /* Set w1 = F(x1) */ 2792 if (coloring->F) { 2793 w1 = coloring->F; /* use already computed value of function */ 2794 coloring->F = 0; 2795 } else { 2796 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2797 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2798 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2799 } 2800 2801 if (!coloring->w3) { 2802 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2803 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2804 } 2805 w3 = coloring->w3; 2806 2807 CHKMEMQ; 2808 /* Compute all the local scale factors, including ghost points */ 2809 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2810 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2811 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2812 if (ctype == IS_COLORING_GHOSTED){ 2813 col_start = 0; col_end = N; 2814 } else if (ctype == IS_COLORING_GLOBAL){ 2815 xx = xx - start; 2816 vscale_array = vscale_array - start; 2817 col_start = start; col_end = N + start; 2818 } CHKMEMQ; 2819 for (col=col_start; col<col_end; col++){ 2820 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2821 if (coloring->htype[0] == 'w') { 2822 dx = 1.0 + unorm; 2823 } else { 2824 dx = xx[col]; 2825 } 2826 if (dx == 0.0) dx = 1.0; 2827 #if !defined(PETSC_USE_COMPLEX) 2828 if (dx < umin && dx >= 0.0) dx = umin; 2829 else if (dx < 0.0 && dx > -umin) dx = -umin; 2830 #else 2831 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2832 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2833 #endif 2834 dx *= epsilon; 2835 vscale_array[col] = 1.0/dx; 2836 } CHKMEMQ; 2837 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2838 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2839 if (ctype == IS_COLORING_GLOBAL){ 2840 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2841 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842 } 2843 CHKMEMQ; 2844 if (coloring->vscaleforrow) { 2845 vscaleforrow = coloring->vscaleforrow; 2846 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2847 2848 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2849 /* 2850 Loop over each color 2851 */ 2852 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2853 for (k=0; k<coloring->ncolors; k++) { 2854 coloring->currentcolor = k; 2855 for (i=0; i<bs; i++) { 2856 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2857 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2858 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2859 /* 2860 Loop over each column associated with color 2861 adding the perturbation to the vector w3. 2862 */ 2863 for (l=0; l<coloring->ncolumns[k]; l++) { 2864 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2865 if (coloring->htype[0] == 'w') { 2866 dx = 1.0 + unorm; 2867 } else { 2868 dx = xx[col]; 2869 } 2870 if (dx == 0.0) dx = 1.0; 2871 #if !defined(PETSC_USE_COMPLEX) 2872 if (dx < umin && dx >= 0.0) dx = umin; 2873 else if (dx < 0.0 && dx > -umin) dx = -umin; 2874 #else 2875 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2876 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2877 #endif 2878 dx *= epsilon; 2879 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2880 w3_array[col] += dx; 2881 } 2882 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2883 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2884 2885 /* 2886 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2887 w2 = F(x1 + dx) - F(x1) 2888 */ 2889 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2890 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2891 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2892 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2893 2894 /* 2895 Loop over rows of vector, putting results into Jacobian matrix 2896 */ 2897 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2898 for (l=0; l<coloring->nrows[k]; l++) { 2899 row = bs*coloring->rows[k][l]; /* local row index */ 2900 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2901 for (j=0; j<bs; j++) { 2902 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2903 srows[j] = row + start + j; 2904 } 2905 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2906 } 2907 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2908 } 2909 } /* endof for each color */ 2910 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2911 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2912 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2913 ierr = PetscFree(srows);CHKERRQ(ierr); 2914 2915 coloring->currentcolor = -1; 2916 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2917 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2918 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2919 PetscFunctionReturn(0); 2920 } 2921 2922 /* -------------------------------------------------------------------*/ 2923 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2924 MatGetRow_SeqBAIJ, 2925 MatRestoreRow_SeqBAIJ, 2926 MatMult_SeqBAIJ_N, 2927 /* 4*/ MatMultAdd_SeqBAIJ_N, 2928 MatMultTranspose_SeqBAIJ, 2929 MatMultTransposeAdd_SeqBAIJ, 2930 0, 2931 0, 2932 0, 2933 /*10*/ 0, 2934 MatLUFactor_SeqBAIJ, 2935 0, 2936 0, 2937 MatTranspose_SeqBAIJ, 2938 /*15*/ MatGetInfo_SeqBAIJ, 2939 MatEqual_SeqBAIJ, 2940 MatGetDiagonal_SeqBAIJ, 2941 MatDiagonalScale_SeqBAIJ, 2942 MatNorm_SeqBAIJ, 2943 /*20*/ 0, 2944 MatAssemblyEnd_SeqBAIJ, 2945 MatSetOption_SeqBAIJ, 2946 MatZeroEntries_SeqBAIJ, 2947 /*24*/ MatZeroRows_SeqBAIJ, 2948 0, 2949 0, 2950 0, 2951 0, 2952 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2953 0, 2954 0, 2955 MatGetArray_SeqBAIJ, 2956 MatRestoreArray_SeqBAIJ, 2957 /*34*/ MatDuplicate_SeqBAIJ, 2958 0, 2959 0, 2960 MatILUFactor_SeqBAIJ, 2961 0, 2962 /*39*/ MatAXPY_SeqBAIJ, 2963 MatGetSubMatrices_SeqBAIJ, 2964 MatIncreaseOverlap_SeqBAIJ, 2965 MatGetValues_SeqBAIJ, 2966 MatCopy_SeqBAIJ, 2967 /*44*/ 0, 2968 MatScale_SeqBAIJ, 2969 0, 2970 0, 2971 MatZeroRowsColumns_SeqBAIJ, 2972 /*49*/ MatSetBlockSize_SeqBAIJ, 2973 MatGetRowIJ_SeqBAIJ, 2974 MatRestoreRowIJ_SeqBAIJ, 2975 MatGetColumnIJ_SeqBAIJ, 2976 MatRestoreColumnIJ_SeqBAIJ, 2977 /*54*/ MatFDColoringCreate_SeqAIJ, 2978 0, 2979 0, 2980 0, 2981 MatSetValuesBlocked_SeqBAIJ, 2982 /*59*/ MatGetSubMatrix_SeqBAIJ, 2983 MatDestroy_SeqBAIJ, 2984 MatView_SeqBAIJ, 2985 0, 2986 0, 2987 /*64*/ 0, 2988 0, 2989 0, 2990 0, 2991 0, 2992 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2993 0, 2994 MatConvert_Basic, 2995 0, 2996 0, 2997 /*74*/ 0, 2998 MatFDColoringApply_BAIJ, 2999 0, 3000 0, 3001 0, 3002 /*79*/ 0, 3003 0, 3004 0, 3005 0, 3006 MatLoad_SeqBAIJ, 3007 /*84*/ 0, 3008 0, 3009 0, 3010 0, 3011 0, 3012 /*89*/ 0, 3013 0, 3014 0, 3015 0, 3016 0, 3017 /*94*/ 0, 3018 0, 3019 0, 3020 0, 3021 0, 3022 /*99*/0, 3023 0, 3024 0, 3025 0, 3026 0, 3027 /*104*/0, 3028 MatRealPart_SeqBAIJ, 3029 MatImaginaryPart_SeqBAIJ, 3030 0, 3031 0, 3032 /*109*/0, 3033 0, 3034 0, 3035 0, 3036 MatMissingDiagonal_SeqBAIJ, 3037 /*114*/0, 3038 0, 3039 0, 3040 0, 3041 0, 3042 /*119*/0, 3043 0, 3044 MatMultHermitianTranspose_SeqBAIJ, 3045 MatMultHermitianTransposeAdd_SeqBAIJ, 3046 0, 3047 }; 3048 3049 EXTERN_C_BEGIN 3050 #undef __FUNCT__ 3051 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3052 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3053 { 3054 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3055 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3056 PetscErrorCode ierr; 3057 3058 PetscFunctionBegin; 3059 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3060 3061 /* allocate space for values if not already there */ 3062 if (!aij->saved_values) { 3063 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3064 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3065 } 3066 3067 /* copy values over */ 3068 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3069 PetscFunctionReturn(0); 3070 } 3071 EXTERN_C_END 3072 3073 EXTERN_C_BEGIN 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3076 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3077 { 3078 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3079 PetscErrorCode ierr; 3080 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3081 3082 PetscFunctionBegin; 3083 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3084 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3085 3086 /* copy values over */ 3087 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3088 PetscFunctionReturn(0); 3089 } 3090 EXTERN_C_END 3091 3092 EXTERN_C_BEGIN 3093 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3094 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3095 EXTERN_C_END 3096 3097 EXTERN_C_BEGIN 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3100 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3101 { 3102 Mat_SeqBAIJ *b; 3103 PetscErrorCode ierr; 3104 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3105 PetscBool flg,skipallocation = PETSC_FALSE; 3106 3107 PetscFunctionBegin; 3108 3109 if (nz == MAT_SKIP_ALLOCATION) { 3110 skipallocation = PETSC_TRUE; 3111 nz = 0; 3112 } 3113 3114 if (bs < 0) { 3115 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3116 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3117 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3118 bs = PetscAbs(bs); 3119 } 3120 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3121 bs = newbs; 3122 3123 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3124 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3125 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3126 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3127 3128 B->preallocated = PETSC_TRUE; 3129 3130 mbs = B->rmap->n/bs; 3131 nbs = B->cmap->n/bs; 3132 bs2 = bs*bs; 3133 3134 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3135 3136 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3137 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3138 if (nnz) { 3139 for (i=0; i<mbs; i++) { 3140 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3141 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3142 } 3143 } 3144 3145 b = (Mat_SeqBAIJ*)B->data; 3146 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3147 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3148 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3149 3150 if (!flg) { 3151 switch (bs) { 3152 case 1: 3153 B->ops->mult = MatMult_SeqBAIJ_1; 3154 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3155 B->ops->sor = MatSOR_SeqBAIJ_1; 3156 break; 3157 case 2: 3158 B->ops->mult = MatMult_SeqBAIJ_2; 3159 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3160 B->ops->sor = MatSOR_SeqBAIJ_2; 3161 break; 3162 case 3: 3163 B->ops->mult = MatMult_SeqBAIJ_3; 3164 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3165 B->ops->sor = MatSOR_SeqBAIJ_3; 3166 break; 3167 case 4: 3168 B->ops->mult = MatMult_SeqBAIJ_4; 3169 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3170 B->ops->sor = MatSOR_SeqBAIJ_4; 3171 break; 3172 case 5: 3173 B->ops->mult = MatMult_SeqBAIJ_5; 3174 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3175 B->ops->sor = MatSOR_SeqBAIJ_5; 3176 break; 3177 case 6: 3178 B->ops->mult = MatMult_SeqBAIJ_6; 3179 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3180 B->ops->sor = MatSOR_SeqBAIJ_6; 3181 break; 3182 case 7: 3183 B->ops->mult = MatMult_SeqBAIJ_7; 3184 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3185 B->ops->sor = MatSOR_SeqBAIJ_7; 3186 break; 3187 case 15: 3188 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3189 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3190 B->ops->sor = MatSOR_SeqBAIJ_N; 3191 break; 3192 default: 3193 B->ops->mult = MatMult_SeqBAIJ_N; 3194 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3195 B->ops->sor = MatSOR_SeqBAIJ_N; 3196 break; 3197 } 3198 } 3199 B->rmap->bs = bs; 3200 b->mbs = mbs; 3201 b->nbs = nbs; 3202 if (!skipallocation) { 3203 if (!b->imax) { 3204 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3205 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3206 b->free_imax_ilen = PETSC_TRUE; 3207 } 3208 /* b->ilen will count nonzeros in each block row so far. */ 3209 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3210 if (!nnz) { 3211 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3212 else if (nz < 0) nz = 1; 3213 for (i=0; i<mbs; i++) b->imax[i] = nz; 3214 nz = nz*mbs; 3215 } else { 3216 nz = 0; 3217 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3218 } 3219 3220 /* allocate the matrix space */ 3221 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3222 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3223 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3224 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3225 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3226 b->singlemalloc = PETSC_TRUE; 3227 b->i[0] = 0; 3228 for (i=1; i<mbs+1; i++) { 3229 b->i[i] = b->i[i-1] + b->imax[i-1]; 3230 } 3231 b->free_a = PETSC_TRUE; 3232 b->free_ij = PETSC_TRUE; 3233 } else { 3234 b->free_a = PETSC_FALSE; 3235 b->free_ij = PETSC_FALSE; 3236 } 3237 3238 B->rmap->bs = bs; 3239 b->bs2 = bs2; 3240 b->mbs = mbs; 3241 b->nz = 0; 3242 b->maxnz = nz; 3243 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3244 PetscFunctionReturn(0); 3245 } 3246 EXTERN_C_END 3247 3248 EXTERN_C_BEGIN 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3251 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3252 { 3253 PetscInt i,m,nz,nz_max=0,*nnz; 3254 PetscScalar *values=0; 3255 PetscErrorCode ierr; 3256 3257 PetscFunctionBegin; 3258 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3259 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3260 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3261 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3262 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3263 m = B->rmap->n/bs; 3264 3265 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3266 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3267 for(i=0; i<m; i++) { 3268 nz = ii[i+1]- ii[i]; 3269 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3270 nz_max = PetscMax(nz_max, nz); 3271 nnz[i] = nz; 3272 } 3273 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3274 ierr = PetscFree(nnz);CHKERRQ(ierr); 3275 3276 values = (PetscScalar*)V; 3277 if (!values) { 3278 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3279 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3280 } 3281 for (i=0; i<m; i++) { 3282 PetscInt ncols = ii[i+1] - ii[i]; 3283 const PetscInt *icols = jj + ii[i]; 3284 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3285 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3286 } 3287 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3288 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3290 3291 PetscFunctionReturn(0); 3292 } 3293 EXTERN_C_END 3294 3295 3296 EXTERN_C_BEGIN 3297 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3298 #if defined(PETSC_HAVE_MUMPS) 3299 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3300 #endif 3301 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3302 EXTERN_C_END 3303 3304 /*MC 3305 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3306 block sparse compressed row format. 3307 3308 Options Database Keys: 3309 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3310 3311 Level: beginner 3312 3313 .seealso: MatCreateSeqBAIJ() 3314 M*/ 3315 3316 3317 EXTERN_C_BEGIN 3318 #undef __FUNCT__ 3319 #define __FUNCT__ "MatCreate_SeqBAIJ" 3320 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3321 { 3322 PetscErrorCode ierr; 3323 PetscMPIInt size; 3324 Mat_SeqBAIJ *b; 3325 3326 PetscFunctionBegin; 3327 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3328 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3329 3330 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3331 B->data = (void*)b; 3332 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3333 b->row = 0; 3334 b->col = 0; 3335 b->icol = 0; 3336 b->reallocs = 0; 3337 b->saved_values = 0; 3338 3339 b->roworiented = PETSC_TRUE; 3340 b->nonew = 0; 3341 b->diag = 0; 3342 b->solve_work = 0; 3343 b->mult_work = 0; 3344 B->spptr = 0; 3345 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3346 b->keepnonzeropattern = PETSC_FALSE; 3347 b->xtoy = 0; 3348 b->XtoY = 0; 3349 B->same_nonzero = PETSC_FALSE; 3350 3351 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3352 "MatGetFactorAvailable_seqbaij_petsc", 3353 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3354 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3355 "MatGetFactor_seqbaij_petsc", 3356 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3357 #if defined(PETSC_HAVE_MUMPS) 3358 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3359 #endif 3360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3361 "MatInvertBlockDiagonal_SeqBAIJ", 3362 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3364 "MatStoreValues_SeqBAIJ", 3365 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3367 "MatRetrieveValues_SeqBAIJ", 3368 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3369 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3370 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3371 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3372 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3373 "MatConvert_SeqBAIJ_SeqAIJ", 3374 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3375 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3376 "MatConvert_SeqBAIJ_SeqSBAIJ", 3377 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3378 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3379 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3380 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3381 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3382 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3383 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3384 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3385 PetscFunctionReturn(0); 3386 } 3387 EXTERN_C_END 3388 3389 #undef __FUNCT__ 3390 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3391 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3392 { 3393 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3394 PetscErrorCode ierr; 3395 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3396 3397 PetscFunctionBegin; 3398 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3399 3400 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3401 c->imax = a->imax; 3402 c->ilen = a->ilen; 3403 c->free_imax_ilen = PETSC_FALSE; 3404 } else { 3405 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3406 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3407 for (i=0; i<mbs; i++) { 3408 c->imax[i] = a->imax[i]; 3409 c->ilen[i] = a->ilen[i]; 3410 } 3411 c->free_imax_ilen = PETSC_TRUE; 3412 } 3413 3414 /* allocate the matrix space */ 3415 if (mallocmatspace){ 3416 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3417 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3418 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3419 c->singlemalloc = PETSC_FALSE; 3420 c->free_ij = PETSC_FALSE; 3421 c->i = a->i; 3422 c->j = a->j; 3423 c->parent = A; 3424 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3425 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3426 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3427 } else { 3428 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3429 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3430 c->singlemalloc = PETSC_TRUE; 3431 c->free_ij = PETSC_TRUE; 3432 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3433 if (mbs > 0) { 3434 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3435 if (cpvalues == MAT_COPY_VALUES) { 3436 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3437 } else { 3438 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3439 } 3440 } 3441 } 3442 } 3443 3444 c->roworiented = a->roworiented; 3445 c->nonew = a->nonew; 3446 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3447 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3448 c->bs2 = a->bs2; 3449 c->mbs = a->mbs; 3450 c->nbs = a->nbs; 3451 3452 if (a->diag) { 3453 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3454 c->diag = a->diag; 3455 c->free_diag = PETSC_FALSE; 3456 } else { 3457 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3458 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3459 for (i=0; i<mbs; i++) { 3460 c->diag[i] = a->diag[i]; 3461 } 3462 c->free_diag = PETSC_TRUE; 3463 } 3464 } else c->diag = 0; 3465 c->nz = a->nz; 3466 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3467 c->solve_work = 0; 3468 c->mult_work = 0; 3469 c->free_a = PETSC_TRUE; 3470 c->free_ij = PETSC_TRUE; 3471 C->preallocated = PETSC_TRUE; 3472 C->assembled = PETSC_TRUE; 3473 3474 c->compressedrow.use = a->compressedrow.use; 3475 c->compressedrow.nrows = a->compressedrow.nrows; 3476 c->compressedrow.check = a->compressedrow.check; 3477 if (a->compressedrow.use){ 3478 i = a->compressedrow.nrows; 3479 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3480 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3481 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3482 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3483 } else { 3484 c->compressedrow.use = PETSC_FALSE; 3485 c->compressedrow.i = PETSC_NULL; 3486 c->compressedrow.rindex = PETSC_NULL; 3487 } 3488 C->same_nonzero = A->same_nonzero; 3489 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3490 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3496 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3497 { 3498 PetscErrorCode ierr; 3499 3500 PetscFunctionBegin; 3501 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3502 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3503 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3504 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 #undef __FUNCT__ 3509 #define __FUNCT__ "MatLoad_SeqBAIJ" 3510 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3511 { 3512 Mat_SeqBAIJ *a; 3513 PetscErrorCode ierr; 3514 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3515 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3516 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3517 PetscInt *masked,nmask,tmp,bs2,ishift; 3518 PetscMPIInt size; 3519 int fd; 3520 PetscScalar *aa; 3521 MPI_Comm comm = ((PetscObject)viewer)->comm; 3522 3523 PetscFunctionBegin; 3524 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3525 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3526 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3527 bs2 = bs*bs; 3528 3529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3530 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3531 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3532 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3533 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3534 M = header[1]; N = header[2]; nz = header[3]; 3535 3536 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3537 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3538 3539 /* 3540 This code adds extra rows to make sure the number of rows is 3541 divisible by the blocksize 3542 */ 3543 mbs = M/bs; 3544 extra_rows = bs - M + bs*(mbs); 3545 if (extra_rows == bs) extra_rows = 0; 3546 else mbs++; 3547 if (extra_rows) { 3548 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3549 } 3550 3551 /* Set global sizes if not already set */ 3552 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3553 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3554 } else { /* Check if the matrix global sizes are correct */ 3555 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3556 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3557 } 3558 3559 /* read in row lengths */ 3560 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3561 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3562 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3563 3564 /* read in column indices */ 3565 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3566 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3567 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3568 3569 /* loop over row lengths determining block row lengths */ 3570 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3571 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3572 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3573 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3574 rowcount = 0; 3575 nzcount = 0; 3576 for (i=0; i<mbs; i++) { 3577 nmask = 0; 3578 for (j=0; j<bs; j++) { 3579 kmax = rowlengths[rowcount]; 3580 for (k=0; k<kmax; k++) { 3581 tmp = jj[nzcount++]/bs; 3582 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3583 } 3584 rowcount++; 3585 } 3586 browlengths[i] += nmask; 3587 /* zero out the mask elements we set */ 3588 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3589 } 3590 3591 /* Do preallocation */ 3592 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3593 a = (Mat_SeqBAIJ*)newmat->data; 3594 3595 /* set matrix "i" values */ 3596 a->i[0] = 0; 3597 for (i=1; i<= mbs; i++) { 3598 a->i[i] = a->i[i-1] + browlengths[i-1]; 3599 a->ilen[i-1] = browlengths[i-1]; 3600 } 3601 a->nz = 0; 3602 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3603 3604 /* read in nonzero values */ 3605 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3606 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3607 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3608 3609 /* set "a" and "j" values into matrix */ 3610 nzcount = 0; jcount = 0; 3611 for (i=0; i<mbs; i++) { 3612 nzcountb = nzcount; 3613 nmask = 0; 3614 for (j=0; j<bs; j++) { 3615 kmax = rowlengths[i*bs+j]; 3616 for (k=0; k<kmax; k++) { 3617 tmp = jj[nzcount++]/bs; 3618 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3619 } 3620 } 3621 /* sort the masked values */ 3622 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3623 3624 /* set "j" values into matrix */ 3625 maskcount = 1; 3626 for (j=0; j<nmask; j++) { 3627 a->j[jcount++] = masked[j]; 3628 mask[masked[j]] = maskcount++; 3629 } 3630 /* set "a" values into matrix */ 3631 ishift = bs2*a->i[i]; 3632 for (j=0; j<bs; j++) { 3633 kmax = rowlengths[i*bs+j]; 3634 for (k=0; k<kmax; k++) { 3635 tmp = jj[nzcountb]/bs ; 3636 block = mask[tmp] - 1; 3637 point = jj[nzcountb] - bs*tmp; 3638 idx = ishift + bs2*block + j + bs*point; 3639 a->a[idx] = (MatScalar)aa[nzcountb++]; 3640 } 3641 } 3642 /* zero out the mask elements we set */ 3643 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3644 } 3645 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3646 3647 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3648 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3649 ierr = PetscFree(aa);CHKERRQ(ierr); 3650 ierr = PetscFree(jj);CHKERRQ(ierr); 3651 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3652 3653 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3656 PetscFunctionReturn(0); 3657 } 3658 3659 #undef __FUNCT__ 3660 #define __FUNCT__ "MatCreateSeqBAIJ" 3661 /*@C 3662 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3663 compressed row) format. For good matrix assembly performance the 3664 user should preallocate the matrix storage by setting the parameter nz 3665 (or the array nnz). By setting these parameters accurately, performance 3666 during matrix assembly can be increased by more than a factor of 50. 3667 3668 Collective on MPI_Comm 3669 3670 Input Parameters: 3671 + comm - MPI communicator, set to PETSC_COMM_SELF 3672 . bs - size of block 3673 . m - number of rows 3674 . n - number of columns 3675 . nz - number of nonzero blocks per block row (same for all rows) 3676 - nnz - array containing the number of nonzero blocks in the various block rows 3677 (possibly different for each block row) or PETSC_NULL 3678 3679 Output Parameter: 3680 . A - the matrix 3681 3682 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3683 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3684 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3685 3686 Options Database Keys: 3687 . -mat_no_unroll - uses code that does not unroll the loops in the 3688 block calculations (much slower) 3689 . -mat_block_size - size of the blocks to use 3690 3691 Level: intermediate 3692 3693 Notes: 3694 The number of rows and columns must be divisible by blocksize. 3695 3696 If the nnz parameter is given then the nz parameter is ignored 3697 3698 A nonzero block is any block that as 1 or more nonzeros in it 3699 3700 The block AIJ format is fully compatible with standard Fortran 77 3701 storage. That is, the stored row and column indices can begin at 3702 either one (as in Fortran) or zero. See the users' manual for details. 3703 3704 Specify the preallocated storage with either nz or nnz (not both). 3705 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3706 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3707 matrices. 3708 3709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3710 @*/ 3711 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3712 { 3713 PetscErrorCode ierr; 3714 3715 PetscFunctionBegin; 3716 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3717 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3718 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3719 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3720 PetscFunctionReturn(0); 3721 } 3722 3723 #undef __FUNCT__ 3724 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3725 /*@C 3726 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3727 per row in the matrix. For good matrix assembly performance the 3728 user should preallocate the matrix storage by setting the parameter nz 3729 (or the array nnz). By setting these parameters accurately, performance 3730 during matrix assembly can be increased by more than a factor of 50. 3731 3732 Collective on MPI_Comm 3733 3734 Input Parameters: 3735 + A - the matrix 3736 . bs - size of block 3737 . nz - number of block nonzeros per block row (same for all rows) 3738 - nnz - array containing the number of block nonzeros in the various block rows 3739 (possibly different for each block row) or PETSC_NULL 3740 3741 Options Database Keys: 3742 . -mat_no_unroll - uses code that does not unroll the loops in the 3743 block calculations (much slower) 3744 . -mat_block_size - size of the blocks to use 3745 3746 Level: intermediate 3747 3748 Notes: 3749 If the nnz parameter is given then the nz parameter is ignored 3750 3751 You can call MatGetInfo() to get information on how effective the preallocation was; 3752 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3753 You can also run with the option -info and look for messages with the string 3754 malloc in them to see if additional memory allocation was needed. 3755 3756 The block AIJ format is fully compatible with standard Fortran 77 3757 storage. That is, the stored row and column indices can begin at 3758 either one (as in Fortran) or zero. See the users' manual for details. 3759 3760 Specify the preallocated storage with either nz or nnz (not both). 3761 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3762 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3763 3764 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3765 @*/ 3766 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3767 { 3768 PetscErrorCode ierr; 3769 3770 PetscFunctionBegin; 3771 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3777 /*@C 3778 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3779 (the default sequential PETSc format). 3780 3781 Collective on MPI_Comm 3782 3783 Input Parameters: 3784 + A - the matrix 3785 . i - the indices into j for the start of each local row (starts with zero) 3786 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3787 - v - optional values in the matrix 3788 3789 Level: developer 3790 3791 .keywords: matrix, aij, compressed row, sparse 3792 3793 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3794 @*/ 3795 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3796 { 3797 PetscErrorCode ierr; 3798 3799 PetscFunctionBegin; 3800 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3801 PetscFunctionReturn(0); 3802 } 3803 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3807 /*@ 3808 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3809 3810 Collective on MPI_Comm 3811 3812 Input Parameters: 3813 + comm - must be an MPI communicator of size 1 3814 . bs - size of block 3815 . m - number of rows 3816 . n - number of columns 3817 . i - row indices 3818 . j - column indices 3819 - a - matrix values 3820 3821 Output Parameter: 3822 . mat - the matrix 3823 3824 Level: advanced 3825 3826 Notes: 3827 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3828 once the matrix is destroyed 3829 3830 You cannot set new nonzero locations into this matrix, that will generate an error. 3831 3832 The i and j indices are 0 based 3833 3834 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3835 3836 3837 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3838 3839 @*/ 3840 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3841 { 3842 PetscErrorCode ierr; 3843 PetscInt ii; 3844 Mat_SeqBAIJ *baij; 3845 3846 PetscFunctionBegin; 3847 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3848 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3849 3850 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3851 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3852 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3853 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3854 baij = (Mat_SeqBAIJ*)(*mat)->data; 3855 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3856 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3857 3858 baij->i = i; 3859 baij->j = j; 3860 baij->a = a; 3861 baij->singlemalloc = PETSC_FALSE; 3862 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3863 baij->free_a = PETSC_FALSE; 3864 baij->free_ij = PETSC_FALSE; 3865 3866 for (ii=0; ii<m; ii++) { 3867 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3868 #if defined(PETSC_USE_DEBUG) 3869 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3870 #endif 3871 } 3872 #if defined(PETSC_USE_DEBUG) 3873 for (ii=0; ii<baij->i[m]; ii++) { 3874 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3875 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3876 } 3877 #endif 3878 3879 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3881 PetscFunctionReturn(0); 3882 } 3883