1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 12 /*@ 13 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 14 15 Collective on Mat 16 17 Input Parameters: 18 . mat - the matrix 19 20 Level: advanced 21 @*/ 22 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 28 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 29 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 30 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 31 PetscFunctionReturn(0); 32 } 33 34 EXTERN_C_BEGIN 35 #undef __FUNCT__ 36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 37 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 40 PetscErrorCode ierr; 41 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 42 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 43 PetscReal shift = 0.0; 44 45 PetscFunctionBegin; 46 if (a->idiagvalid) PetscFunctionReturn(0); 47 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 48 diag_offset = a->diag; 49 if (!a->idiag) { 50 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 51 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 52 } 53 diag = a->idiag; 54 mdiag = a->idiag+bs2*mbs; 55 /* factor and invert each block */ 56 switch (bs){ 57 case 1: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 1*diag_offset[i]; 60 diag[0] = odiag[0]; 61 mdiag[0] = odiag[0]; 62 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 63 diag += 1; 64 mdiag += 1; 65 } 66 break; 67 case 2: 68 for (i=0; i<mbs; i++) { 69 odiag = v + 4*diag_offset[i]; 70 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 71 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 72 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 73 diag += 4; 74 mdiag += 4; 75 } 76 break; 77 case 3: 78 for (i=0; i<mbs; i++) { 79 odiag = v + 9*diag_offset[i]; 80 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 81 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 82 diag[8] = odiag[8]; 83 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 84 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 85 mdiag[8] = odiag[8]; 86 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 87 diag += 9; 88 mdiag += 9; 89 } 90 break; 91 case 4: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 16*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 97 diag += 16; 98 mdiag += 16; 99 } 100 break; 101 case 5: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 25*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 107 diag += 25; 108 mdiag += 25; 109 } 110 break; 111 case 6: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 36*diag_offset[i]; 114 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 127 diag += 49; 128 mdiag += 49; 129 } 130 break; 131 default: 132 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 133 for (i=0; i<mbs; i++) { 134 odiag = v + bs2*diag_offset[i]; 135 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 EXTERN_C_END 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 151 { 152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 153 PetscScalar *x,x1,s1; 154 const PetscScalar *b; 155 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,j; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 its = its*lits; 163 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 164 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 165 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 166 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 167 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 168 169 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 170 171 diag = a->diag; 172 idiag = a->idiag; 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 178 x[0] = b[0]*idiag[0]; 179 i2 = 1; 180 idiag += 1; 181 for (i=1; i<m; i++) { 182 v = aa + ai[i]; 183 vi = aj + ai[i]; 184 nz = diag[i] - ai[i]; 185 s1 = b[i2]; 186 for (j=0; j<nz; j++) { 187 s1 -= v[j]*x[vi[j]]; 188 } 189 x[i2] = idiag[0]*s1; 190 idiag += 1; 191 i2 += 1; 192 } 193 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 194 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 195 } 196 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 197 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 198 i2 = 0; 199 mdiag = a->idiag+a->mbs; 200 for (i=0; i<m; i++) { 201 x1 = x[i2]; 202 x[i2] = mdiag[0]*x1; 203 mdiag += 1; 204 i2 += 1; 205 } 206 ierr = PetscLogFlops(m);CHKERRQ(ierr); 207 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 208 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 209 } 210 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 211 idiag = a->idiag+a->mbs - 1; 212 i2 = m - 1; 213 x1 = x[i2]; 214 x[i2] = idiag[0]*x1; 215 idiag -= 1; 216 i2 -= 1; 217 for (i=m-2; i>=0; i--) { 218 v = aa + (diag[i]+1); 219 vi = aj + diag[i] + 1; 220 nz = ai[i+1] - diag[i] - 1; 221 s1 = x[i2]; 222 for (j=0; j<nz; j++) { 223 s1 -= v[j]*x[vi[j]]; 224 } 225 x[i2] = idiag[0]*s1; 226 idiag -= 1; 227 i2 -= 1; 228 } 229 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 230 } 231 } else { 232 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 233 } 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 PetscScalar *x,x1,x2,s1,s2; 245 const PetscScalar *b; 246 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 247 PetscErrorCode ierr; 248 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 249 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 250 251 PetscFunctionBegin; 252 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 253 its = its*lits; 254 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 255 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 256 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 257 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 258 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 259 260 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 261 262 diag = a->diag; 263 idiag = a->idiag; 264 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 266 267 if (flag & SOR_ZERO_INITIAL_GUESS) { 268 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 269 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 270 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 271 i2 = 2; 272 idiag += 4; 273 for (i=1; i<m; i++) { 274 v = aa + 4*ai[i]; 275 vi = aj + ai[i]; 276 nz = diag[i] - ai[i]; 277 s1 = b[i2]; s2 = b[i2+1]; 278 for (j=0; j<nz; j++) { 279 idx = 2*vi[j]; 280 it = 4*j; 281 x1 = x[idx]; x2 = x[1+idx]; 282 s1 -= v[it]*x1 + v[it+2]*x2; 283 s2 -= v[it+1]*x1 + v[it+3]*x2; 284 } 285 x[i2] = idiag[0]*s1 + idiag[2]*s2; 286 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 287 idiag += 4; 288 i2 += 2; 289 } 290 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 291 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 292 } 293 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 294 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 295 i2 = 0; 296 mdiag = a->idiag+4*a->mbs; 297 for (i=0; i<m; i++) { 298 x1 = x[i2]; x2 = x[i2+1]; 299 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 300 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 301 mdiag += 4; 302 i2 += 2; 303 } 304 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 305 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 306 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 307 } 308 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 309 idiag = a->idiag+4*a->mbs - 4; 310 i2 = 2*m - 2; 311 x1 = x[i2]; x2 = x[i2+1]; 312 x[i2] = idiag[0]*x1 + idiag[2]*x2; 313 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 314 idiag -= 4; 315 i2 -= 2; 316 for (i=m-2; i>=0; i--) { 317 v = aa + 4*(diag[i]+1); 318 vi = aj + diag[i] + 1; 319 nz = ai[i+1] - diag[i] - 1; 320 s1 = x[i2]; s2 = x[i2+1]; 321 for (j=0; j<nz; j++) { 322 idx = 2*vi[j]; 323 it = 4*j; 324 x1 = x[idx]; x2 = x[1+idx]; 325 s1 -= v[it]*x1 + v[it+2]*x2; 326 s2 -= v[it+1]*x1 + v[it+3]*x2; 327 } 328 x[i2] = idiag[0]*s1 + idiag[2]*s2; 329 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 330 idiag -= 4; 331 i2 -= 2; 332 } 333 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 334 } 335 } else { 336 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 337 } 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 346 { 347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 348 PetscScalar *x,x1,x2,x3,s1,s2,s3; 349 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 350 const PetscScalar *b; 351 PetscErrorCode ierr; 352 PetscInt m = a->mbs,i,i2,nz,idx; 353 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 354 355 PetscFunctionBegin; 356 its = its*lits; 357 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 358 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 359 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 360 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 361 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 362 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 363 364 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 365 366 diag = a->diag; 367 idiag = a->idiag; 368 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 369 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 370 371 if (flag & SOR_ZERO_INITIAL_GUESS) { 372 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 373 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 374 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 375 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 376 i2 = 3; 377 idiag += 9; 378 for (i=1; i<m; i++) { 379 v = aa + 9*ai[i]; 380 vi = aj + ai[i]; 381 nz = diag[i] - ai[i]; 382 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 383 while (nz--) { 384 idx = 3*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 386 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 387 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 388 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 389 v += 9; 390 } 391 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 392 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 393 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 394 idiag += 9; 395 i2 += 3; 396 } 397 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 398 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 399 } 400 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 401 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 402 i2 = 0; 403 mdiag = a->idiag+9*a->mbs; 404 for (i=0; i<m; i++) { 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 406 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 407 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 408 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 409 mdiag += 9; 410 i2 += 3; 411 } 412 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 413 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 414 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 415 } 416 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 417 idiag = a->idiag+9*a->mbs - 9; 418 i2 = 3*m - 3; 419 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 420 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 421 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 422 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 423 idiag -= 9; 424 i2 -= 3; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 9*(diag[i]+1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 430 while (nz--) { 431 idx = 3*(*vi++); 432 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 433 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 434 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 435 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 436 v += 9; 437 } 438 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 439 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 440 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 441 idiag -= 9; 442 i2 -= 3; 443 } 444 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 445 } 446 } else { 447 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 448 } 449 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 457 { 458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 459 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 460 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 461 const PetscScalar *b; 462 PetscErrorCode ierr; 463 PetscInt m = a->mbs,i,i2,nz,idx; 464 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 465 466 PetscFunctionBegin; 467 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 468 its = its*lits; 469 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 470 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 471 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 472 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 473 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 474 475 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 476 477 diag = a->diag; 478 idiag = a->idiag; 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 481 482 if (flag & SOR_ZERO_INITIAL_GUESS) { 483 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 484 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 485 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 486 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 487 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 488 i2 = 4; 489 idiag += 16; 490 for (i=1; i<m; i++) { 491 v = aa + 16*ai[i]; 492 vi = aj + ai[i]; 493 nz = diag[i] - ai[i]; 494 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 495 while (nz--) { 496 idx = 4*(*vi++); 497 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 498 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 499 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 500 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 501 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 502 v += 16; 503 } 504 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 505 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 506 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 507 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 508 idiag += 16; 509 i2 += 4; 510 } 511 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 512 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 513 } 514 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 515 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 516 i2 = 0; 517 mdiag = a->idiag+16*a->mbs; 518 for (i=0; i<m; i++) { 519 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 520 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 521 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 522 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 523 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 524 mdiag += 16; 525 i2 += 4; 526 } 527 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 528 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 529 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 530 } 531 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 532 idiag = a->idiag+16*a->mbs - 16; 533 i2 = 4*m - 4; 534 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 535 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 536 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 537 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 538 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 539 idiag -= 16; 540 i2 -= 4; 541 for (i=m-2; i>=0; i--) { 542 v = aa + 16*(diag[i]+1); 543 vi = aj + diag[i] + 1; 544 nz = ai[i+1] - diag[i] - 1; 545 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 546 while (nz--) { 547 idx = 4*(*vi++); 548 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 549 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 550 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 551 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 552 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 553 v += 16; 554 } 555 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 556 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 557 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 558 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 559 idiag -= 16; 560 i2 -= 4; 561 } 562 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 563 } 564 } else { 565 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 566 } 567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 575 { 576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 577 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 578 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 579 const PetscScalar *b; 580 PetscErrorCode ierr; 581 PetscInt m = a->mbs,i,i2,nz,idx; 582 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 583 584 PetscFunctionBegin; 585 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 586 its = its*lits; 587 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 588 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 589 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 590 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 591 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 592 593 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 594 595 diag = a->diag; 596 idiag = a->idiag; 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 599 600 if (flag & SOR_ZERO_INITIAL_GUESS) { 601 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 602 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 603 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 604 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 605 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 606 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 607 i2 = 5; 608 idiag += 25; 609 for (i=1; i<m; i++) { 610 v = aa + 25*ai[i]; 611 vi = aj + ai[i]; 612 nz = diag[i] - ai[i]; 613 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 614 while (nz--) { 615 idx = 5*(*vi++); 616 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 617 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 618 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 619 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 620 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 621 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 622 v += 25; 623 } 624 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 625 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 626 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 627 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 628 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 629 idiag += 25; 630 i2 += 5; 631 } 632 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 633 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 634 } 635 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 636 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 637 i2 = 0; 638 mdiag = a->idiag+25*a->mbs; 639 for (i=0; i<m; i++) { 640 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 641 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 642 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 643 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 644 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 645 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 646 mdiag += 25; 647 i2 += 5; 648 } 649 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 650 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 651 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 652 } 653 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 654 idiag = a->idiag+25*a->mbs - 25; 655 i2 = 5*m - 5; 656 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 657 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 658 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 659 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 660 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 661 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 662 idiag -= 25; 663 i2 -= 5; 664 for (i=m-2; i>=0; i--) { 665 v = aa + 25*(diag[i]+1); 666 vi = aj + diag[i] + 1; 667 nz = ai[i+1] - diag[i] - 1; 668 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 672 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 673 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 674 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 675 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 676 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 677 v += 25; 678 } 679 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 680 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 681 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 682 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 683 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 684 idiag -= 25; 685 i2 -= 5; 686 } 687 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 688 } 689 } else { 690 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 691 } 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 700 { 701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 702 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 703 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 704 const PetscScalar *b; 705 PetscErrorCode ierr; 706 PetscInt m = a->mbs,i,i2,nz,idx; 707 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 708 709 PetscFunctionBegin; 710 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 711 its = its*lits; 712 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 713 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 714 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 715 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 716 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 717 718 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 719 720 diag = a->diag; 721 idiag = a->idiag; 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 724 725 if (flag & SOR_ZERO_INITIAL_GUESS) { 726 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 727 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 728 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 729 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 730 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 731 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 732 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 733 i2 = 6; 734 idiag += 36; 735 for (i=1; i<m; i++) { 736 v = aa + 36*ai[i]; 737 vi = aj + ai[i]; 738 nz = diag[i] - ai[i]; 739 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 740 while (nz--) { 741 idx = 6*(*vi++); 742 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 743 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 744 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 745 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 746 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 747 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 748 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 749 v += 36; 750 } 751 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 752 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 753 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 754 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 755 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 756 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 757 idiag += 36; 758 i2 += 6; 759 } 760 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 761 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 762 } 763 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 764 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 765 i2 = 0; 766 mdiag = a->idiag+36*a->mbs; 767 for (i=0; i<m; i++) { 768 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 769 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 770 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 771 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 772 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 773 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 774 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 775 mdiag += 36; 776 i2 += 6; 777 } 778 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 779 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 780 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 781 } 782 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 783 idiag = a->idiag+36*a->mbs - 36; 784 i2 = 6*m - 6; 785 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 786 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 787 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 788 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 789 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 790 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 791 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 792 idiag -= 36; 793 i2 -= 6; 794 for (i=m-2; i>=0; i--) { 795 v = aa + 36*(diag[i]+1); 796 vi = aj + diag[i] + 1; 797 nz = ai[i+1] - diag[i] - 1; 798 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 799 while (nz--) { 800 idx = 6*(*vi++); 801 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 802 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 803 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 804 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 805 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 806 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 807 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 808 v += 36; 809 } 810 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 811 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 812 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 813 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 814 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 815 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 816 idiag -= 36; 817 i2 -= 6; 818 } 819 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 820 } 821 } else { 822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 823 } 824 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 825 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 834 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 835 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 836 const PetscScalar *b; 837 PetscErrorCode ierr; 838 PetscInt m = a->mbs,i,i2,nz,idx; 839 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 840 841 PetscFunctionBegin; 842 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 843 its = its*lits; 844 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 845 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 846 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 847 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 848 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 849 850 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 851 852 diag = a->diag; 853 idiag = a->idiag; 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 856 857 if (flag & SOR_ZERO_INITIAL_GUESS) { 858 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 859 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 860 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 861 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 862 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 863 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 864 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 865 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 866 i2 = 7; 867 idiag += 49; 868 for (i=1; i<m; i++) { 869 v = aa + 49*ai[i]; 870 vi = aj + ai[i]; 871 nz = diag[i] - ai[i]; 872 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 873 while (nz--) { 874 idx = 7*(*vi++); 875 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 876 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 877 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 878 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 879 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 880 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 881 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 882 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 883 v += 49; 884 } 885 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 886 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 887 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 888 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 889 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 890 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 891 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 892 idiag += 49; 893 i2 += 7; 894 } 895 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 896 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 897 } 898 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 899 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 900 i2 = 0; 901 mdiag = a->idiag+49*a->mbs; 902 for (i=0; i<m; i++) { 903 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 904 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 905 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 906 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 907 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 908 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 909 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 910 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 911 mdiag += 49; 912 i2 += 7; 913 } 914 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 915 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 916 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 917 } 918 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 919 idiag = a->idiag+49*a->mbs - 49; 920 i2 = 7*m - 7; 921 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 922 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 923 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 924 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 925 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 926 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 927 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 928 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 929 idiag -= 49; 930 i2 -= 7; 931 for (i=m-2; i>=0; i--) { 932 v = aa + 49*(diag[i]+1); 933 vi = aj + diag[i] + 1; 934 nz = ai[i+1] - diag[i] - 1; 935 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 936 while (nz--) { 937 idx = 7*(*vi++); 938 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 939 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 940 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 941 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 942 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 943 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 944 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 945 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 946 v += 49; 947 } 948 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 949 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 950 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 951 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 952 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 953 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 954 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 955 idiag -= 49; 956 i2 -= 7; 957 } 958 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 959 } 960 } else { 961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscScalar *x,*work,*w,*workt; 974 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 975 const PetscScalar *b; 976 PetscErrorCode ierr; 977 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 978 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 979 980 PetscFunctionBegin; 981 its = its*lits; 982 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 983 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 984 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 985 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 986 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 987 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 988 989 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 990 991 diag = a->diag; 992 idiag = a->idiag; 993 if (!a->mult_work) { 994 k = PetscMax(A->rmap->n,A->cmap->n); 995 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 996 } 997 work = a->mult_work; 998 if (!a->sor_work) { 999 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1000 } 1001 w = a->sor_work; 1002 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1005 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1008 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1009 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1010 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1011 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1012 i2 = bs; 1013 idiag += bs2; 1014 for (i=1; i<m; i++) { 1015 v = aa + bs2*ai[i]; 1016 vi = aj + ai[i]; 1017 nz = diag[i] - ai[i]; 1018 1019 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1020 /* copy all rows of x that are needed into contiguous space */ 1021 workt = work; 1022 for (j=0; j<nz; j++) { 1023 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1024 workt += bs; 1025 } 1026 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1027 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1028 while (nz--) { 1029 idx = N*(*vi++); 1030 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1031 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1032 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1033 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1034 v += N2; 1035 } */ 1036 1037 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1038 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1039 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1040 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1041 1042 idiag += bs2; 1043 i2 += bs; 1044 } 1045 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1046 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1047 } 1048 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1049 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1050 i2 = 0; 1051 mdiag = a->idiag+bs2*a->mbs; 1052 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 for (i=0; i<m; i++) { 1054 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1055 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1056 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1057 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1058 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1059 1060 mdiag += bs2; 1061 i2 += bs; 1062 } 1063 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1064 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1065 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1066 } 1067 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 idiag = a->idiag+bs2*a->mbs - bs2; 1069 i2 = bs*m - bs; 1070 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1071 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1072 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1073 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1074 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1075 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1076 idiag -= bs2; 1077 i2 -= bs; 1078 for (i=m-2; i>=0; i--) { 1079 v = aa + bs2*(diag[i]+1); 1080 vi = aj + diag[i] + 1; 1081 nz = ai[i+1] - diag[i] - 1; 1082 1083 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1084 /* copy all rows of x that are needed into contiguous space */ 1085 workt = work; 1086 for (j=0; j<nz; j++) { 1087 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1088 workt += bs; 1089 } 1090 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1091 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1092 while (nz--) { 1093 idx = N*(*vi++); 1094 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1095 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1096 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1097 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1098 v += N2; 1099 } */ 1100 1101 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1102 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1103 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1104 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1105 idiag -= bs2; 1106 i2 -= bs; 1107 } 1108 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1109 } 1110 } else { 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1112 } 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* 1119 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1120 */ 1121 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1124 #define matsetvaluesblocked4_ matsetvaluesblocked4 1125 #endif 1126 1127 EXTERN_C_BEGIN 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "matsetvaluesblocked4_" 1130 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1131 { 1132 Mat A = *AA; 1133 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1134 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1135 PetscInt *ai=a->i,*ailen=a->ilen; 1136 PetscInt *aj=a->j,stepval,lastcol = -1; 1137 const PetscScalar *value = v; 1138 MatScalar *ap,*aa = a->a,*bap; 1139 1140 PetscFunctionBegin; 1141 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1142 stepval = (n-1)*4; 1143 for (k=0; k<m; k++) { /* loop over added rows */ 1144 row = im[k]; 1145 rp = aj + ai[row]; 1146 ap = aa + 16*ai[row]; 1147 nrow = ailen[row]; 1148 low = 0; 1149 high = nrow; 1150 for (l=0; l<n; l++) { /* loop over added columns */ 1151 col = in[l]; 1152 if (col <= lastcol) low = 0; else high = nrow; 1153 lastcol = col; 1154 value = v + k*(stepval+4 + l)*4; 1155 while (high-low > 7) { 1156 t = (low+high)/2; 1157 if (rp[t] > col) high = t; 1158 else low = t; 1159 } 1160 for (i=low; i<high; i++) { 1161 if (rp[i] > col) break; 1162 if (rp[i] == col) { 1163 bap = ap + 16*i; 1164 for (ii=0; ii<4; ii++,value+=stepval) { 1165 for (jj=ii; jj<16; jj+=4) { 1166 bap[jj] += *value++; 1167 } 1168 } 1169 goto noinsert2; 1170 } 1171 } 1172 N = nrow++ - 1; 1173 high++; /* added new column index thus must search to one higher than before */ 1174 /* shift up all the later entries in this row */ 1175 for (ii=N; ii>=i; ii--) { 1176 rp[ii+1] = rp[ii]; 1177 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1178 } 1179 if (N >= i) { 1180 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1181 } 1182 rp[i] = col; 1183 bap = ap + 16*i; 1184 for (ii=0; ii<4; ii++,value+=stepval) { 1185 for (jj=ii; jj<16; jj+=4) { 1186 bap[jj] = *value++; 1187 } 1188 } 1189 noinsert2:; 1190 low = i; 1191 } 1192 ailen[row] = nrow; 1193 } 1194 PetscFunctionReturnVoid(); 1195 } 1196 EXTERN_C_END 1197 1198 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1199 #define matsetvalues4_ MATSETVALUES4 1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1201 #define matsetvalues4_ matsetvalues4 1202 #endif 1203 1204 EXTERN_C_BEGIN 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSetValues4_" 1207 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1208 { 1209 Mat A = *AA; 1210 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1211 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1212 PetscInt *ai=a->i,*ailen=a->ilen; 1213 PetscInt *aj=a->j,brow,bcol; 1214 PetscInt ridx,cidx,lastcol = -1; 1215 MatScalar *ap,value,*aa=a->a,*bap; 1216 1217 PetscFunctionBegin; 1218 for (k=0; k<m; k++) { /* loop over added rows */ 1219 row = im[k]; brow = row/4; 1220 rp = aj + ai[brow]; 1221 ap = aa + 16*ai[brow]; 1222 nrow = ailen[brow]; 1223 low = 0; 1224 high = nrow; 1225 for (l=0; l<n; l++) { /* loop over added columns */ 1226 col = in[l]; bcol = col/4; 1227 ridx = row % 4; cidx = col % 4; 1228 value = v[l + k*n]; 1229 if (col <= lastcol) low = 0; else high = nrow; 1230 lastcol = col; 1231 while (high-low > 7) { 1232 t = (low+high)/2; 1233 if (rp[t] > bcol) high = t; 1234 else low = t; 1235 } 1236 for (i=low; i<high; i++) { 1237 if (rp[i] > bcol) break; 1238 if (rp[i] == bcol) { 1239 bap = ap + 16*i + 4*cidx + ridx; 1240 *bap += value; 1241 goto noinsert1; 1242 } 1243 } 1244 N = nrow++ - 1; 1245 high++; /* added new column thus must search to one higher than before */ 1246 /* shift up all the later entries in this row */ 1247 for (ii=N; ii>=i; ii--) { 1248 rp[ii+1] = rp[ii]; 1249 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1250 } 1251 if (N>=i) { 1252 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1253 } 1254 rp[i] = bcol; 1255 ap[16*i + 4*cidx + ridx] = value; 1256 noinsert1:; 1257 low = i; 1258 } 1259 ailen[brow] = nrow; 1260 } 1261 PetscFunctionReturnVoid(); 1262 } 1263 EXTERN_C_END 1264 1265 /* 1266 Checks for missing diagonals 1267 */ 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 PetscInt *diag,*jj = a->j,i; 1275 1276 PetscFunctionBegin; 1277 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1278 *missing = PETSC_FALSE; 1279 if (A->rmap->n > 0 && !jj) { 1280 *missing = PETSC_TRUE; 1281 if (d) *d = 0; 1282 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1283 } else { 1284 diag = a->diag; 1285 for (i=0; i<a->mbs; i++) { 1286 if (jj[diag[i]] != i) { 1287 *missing = PETSC_TRUE; 1288 if (d) *d = i; 1289 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1290 } 1291 } 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1301 PetscErrorCode ierr; 1302 PetscInt i,j,m = a->mbs; 1303 1304 PetscFunctionBegin; 1305 if (!a->diag) { 1306 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1307 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1308 a->free_diag = PETSC_TRUE; 1309 } 1310 for (i=0; i<m; i++) { 1311 a->diag[i] = a->i[i+1]; 1312 for (j=a->i[i]; j<a->i[i+1]; j++) { 1313 if (a->j[j] == i) { 1314 a->diag[i] = j; 1315 break; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1328 { 1329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1332 PetscInt *tia, *tja; 1333 1334 PetscFunctionBegin; 1335 *nn = n; 1336 if (!ia) PetscFunctionReturn(0); 1337 if (symmetric) { 1338 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1339 nz = tia[n]; 1340 } else { 1341 tia = a->i; tja = a->j; 1342 } 1343 1344 if (!blockcompressed && bs > 1) { 1345 (*nn) *= bs; 1346 /* malloc & create the natural set of indices */ 1347 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1348 if (n) { 1349 (*ia)[0] = 0; 1350 for (j=1; j<bs; j++) { 1351 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1352 } 1353 } 1354 1355 for (i=1; i<n; i++) { 1356 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1357 for (j=1; j<bs; j++) { 1358 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1359 } 1360 } 1361 if (n) { 1362 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1363 } 1364 1365 if (ja) { 1366 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1367 cnt = 0; 1368 for (i=0; i<n; i++) { 1369 for (j=0; j<bs; j++) { 1370 for (k=tia[i]; k<tia[i+1]; k++) { 1371 for (l=0; l<bs; l++) { 1372 (*ja)[cnt++] = bs*tja[k] + l; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 1379 n *= bs; 1380 nz *= bs*bs; 1381 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1382 ierr = PetscFree(tia);CHKERRQ(ierr); 1383 ierr = PetscFree(tja);CHKERRQ(ierr); 1384 } 1385 } else if (oshift == 1) { 1386 if (symmetric) { 1387 PetscInt nz = tia[A->rmap->n/bs]; 1388 /* add 1 to i and j indices */ 1389 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1390 *ia = tia; 1391 if (ja) { 1392 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1393 *ja = tja; 1394 } 1395 } else { 1396 PetscInt nz = a->i[A->rmap->n/bs]; 1397 /* malloc space and add 1 to i and j indices */ 1398 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1399 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1400 if (ja) { 1401 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1402 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1403 } 1404 } 1405 } else { 1406 *ia = tia; 1407 if (ja) *ja = tja; 1408 } 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1416 { 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (!ia) PetscFunctionReturn(0); 1421 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1422 ierr = PetscFree(*ia);CHKERRQ(ierr); 1423 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 #if defined(PETSC_USE_LOG) 1437 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1438 #endif 1439 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1440 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1441 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1442 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1443 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1444 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1445 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1446 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1447 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1448 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1449 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1450 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1451 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1452 1453 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1454 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1455 ierr = PetscFree(A->data);CHKERRQ(ierr); 1456 1457 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1458 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1459 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1461 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1462 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1466 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1467 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1473 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 switch (op) { 1480 case MAT_ROW_ORIENTED: 1481 a->roworiented = flg; 1482 break; 1483 case MAT_KEEP_NONZERO_PATTERN: 1484 a->keepnonzeropattern = flg; 1485 break; 1486 case MAT_NEW_NONZERO_LOCATIONS: 1487 a->nonew = (flg ? 0 : 1); 1488 break; 1489 case MAT_NEW_NONZERO_LOCATION_ERR: 1490 a->nonew = (flg ? -1 : 0); 1491 break; 1492 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1493 a->nonew = (flg ? -2 : 0); 1494 break; 1495 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1496 a->nounused = (flg ? -1 : 0); 1497 break; 1498 case MAT_CHECK_COMPRESSED_ROW: 1499 a->compressedrow.check = flg; 1500 break; 1501 case MAT_NEW_DIAGONALS: 1502 case MAT_IGNORE_OFF_PROC_ENTRIES: 1503 case MAT_USE_HASH_TABLE: 1504 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1505 break; 1506 case MAT_SYMMETRIC: 1507 case MAT_STRUCTURALLY_SYMMETRIC: 1508 case MAT_HERMITIAN: 1509 case MAT_SYMMETRY_ETERNAL: 1510 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1511 break; 1512 default: 1513 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1525 MatScalar *aa,*aa_i; 1526 PetscScalar *v_i; 1527 1528 PetscFunctionBegin; 1529 bs = A->rmap->bs; 1530 ai = a->i; 1531 aj = a->j; 1532 aa = a->a; 1533 bs2 = a->bs2; 1534 1535 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1536 1537 bn = row/bs; /* Block number */ 1538 bp = row % bs; /* Block Position */ 1539 M = ai[bn+1] - ai[bn]; 1540 *nz = bs*M; 1541 1542 if (v) { 1543 *v = 0; 1544 if (*nz) { 1545 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1546 for (i=0; i<M; i++) { /* for each block in the block row */ 1547 v_i = *v + i*bs; 1548 aa_i = aa + bs2*(ai[bn] + i); 1549 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1550 } 1551 } 1552 } 1553 1554 if (idx) { 1555 *idx = 0; 1556 if (*nz) { 1557 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1558 for (i=0; i<M; i++) { /* for each block in the block row */ 1559 idx_i = *idx + i*bs; 1560 itmp = bs*aj[ai[bn] + i]; 1561 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1562 } 1563 } 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1571 { 1572 PetscErrorCode ierr; 1573 1574 PetscFunctionBegin; 1575 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1576 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1577 PetscFunctionReturn(0); 1578 } 1579 1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1585 { 1586 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1587 Mat C; 1588 PetscErrorCode ierr; 1589 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1590 PetscInt *rows,*cols,bs2=a->bs2; 1591 MatScalar *array; 1592 1593 PetscFunctionBegin; 1594 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1595 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1596 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1597 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1598 1599 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1600 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1601 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1602 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1603 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1604 ierr = PetscFree(col);CHKERRQ(ierr); 1605 } else { 1606 C = *B; 1607 } 1608 1609 array = a->a; 1610 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1611 for (i=0; i<mbs; i++) { 1612 cols[0] = i*bs; 1613 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1614 len = ai[i+1] - ai[i]; 1615 for (j=0; j<len; j++) { 1616 rows[0] = (*aj++)*bs; 1617 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1618 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1619 array += bs2; 1620 } 1621 } 1622 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1623 1624 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1625 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1626 1627 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1628 *B = C; 1629 } else { 1630 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1631 } 1632 PetscFunctionReturn(0); 1633 } 1634 1635 EXTERN_C_BEGIN 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1638 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1639 { 1640 PetscErrorCode ierr; 1641 Mat Btrans; 1642 1643 PetscFunctionBegin; 1644 *f = PETSC_FALSE; 1645 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1646 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1647 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1648 PetscFunctionReturn(0); 1649 } 1650 EXTERN_C_END 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1654 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1655 { 1656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1657 PetscErrorCode ierr; 1658 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1659 int fd; 1660 PetscScalar *aa; 1661 FILE *file; 1662 1663 PetscFunctionBegin; 1664 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1665 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1666 col_lens[0] = MAT_FILE_CLASSID; 1667 1668 col_lens[1] = A->rmap->N; 1669 col_lens[2] = A->cmap->n; 1670 col_lens[3] = a->nz*bs2; 1671 1672 /* store lengths of each row and write (including header) to file */ 1673 count = 0; 1674 for (i=0; i<a->mbs; i++) { 1675 for (j=0; j<bs; j++) { 1676 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1677 } 1678 } 1679 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1680 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1681 1682 /* store column indices (zero start index) */ 1683 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1684 count = 0; 1685 for (i=0; i<a->mbs; i++) { 1686 for (j=0; j<bs; j++) { 1687 for (k=a->i[i]; k<a->i[i+1]; k++) { 1688 for (l=0; l<bs; l++) { 1689 jj[count++] = bs*a->j[k] + l; 1690 } 1691 } 1692 } 1693 } 1694 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1695 ierr = PetscFree(jj);CHKERRQ(ierr); 1696 1697 /* store nonzero values */ 1698 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1699 count = 0; 1700 for (i=0; i<a->mbs; i++) { 1701 for (j=0; j<bs; j++) { 1702 for (k=a->i[i]; k<a->i[i+1]; k++) { 1703 for (l=0; l<bs; l++) { 1704 aa[count++] = a->a[bs2*k + l*bs + j]; 1705 } 1706 } 1707 } 1708 } 1709 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1710 ierr = PetscFree(aa);CHKERRQ(ierr); 1711 1712 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1713 if (file) { 1714 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1715 } 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1721 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1722 { 1723 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1724 PetscErrorCode ierr; 1725 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1726 PetscViewerFormat format; 1727 1728 PetscFunctionBegin; 1729 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1730 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1731 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1732 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1733 Mat aij; 1734 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1735 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1736 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1737 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1738 PetscFunctionReturn(0); 1739 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1740 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1741 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1742 for (i=0; i<a->mbs; i++) { 1743 for (j=0; j<bs; j++) { 1744 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1745 for (k=a->i[i]; k<a->i[i+1]; k++) { 1746 for (l=0; l<bs; l++) { 1747 #if defined(PETSC_USE_COMPLEX) 1748 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1749 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1750 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1751 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1752 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1753 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1754 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1755 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1756 } 1757 #else 1758 if (a->a[bs2*k + l*bs + j] != 0.0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1760 } 1761 #endif 1762 } 1763 } 1764 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1765 } 1766 } 1767 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1768 } else { 1769 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1770 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1771 for (i=0; i<a->mbs; i++) { 1772 for (j=0; j<bs; j++) { 1773 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1774 for (k=a->i[i]; k<a->i[i+1]; k++) { 1775 for (l=0; l<bs; l++) { 1776 #if defined(PETSC_USE_COMPLEX) 1777 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1778 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1779 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1780 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1781 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1782 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1783 } else { 1784 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1785 } 1786 #else 1787 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1788 #endif 1789 } 1790 } 1791 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1792 } 1793 } 1794 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1795 } 1796 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1802 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1803 { 1804 Mat A = (Mat) Aa; 1805 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1806 PetscErrorCode ierr; 1807 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1808 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1809 MatScalar *aa; 1810 PetscViewer viewer; 1811 1812 PetscFunctionBegin; 1813 1814 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1815 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1816 1817 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1818 1819 /* loop over matrix elements drawing boxes */ 1820 color = PETSC_DRAW_BLUE; 1821 for (i=0,row=0; i<mbs; i++,row+=bs) { 1822 for (j=a->i[i]; j<a->i[i+1]; j++) { 1823 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1824 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1825 aa = a->a + j*bs2; 1826 for (k=0; k<bs; k++) { 1827 for (l=0; l<bs; l++) { 1828 if (PetscRealPart(*aa++) >= 0.) continue; 1829 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1830 } 1831 } 1832 } 1833 } 1834 color = PETSC_DRAW_CYAN; 1835 for (i=0,row=0; i<mbs; i++,row+=bs) { 1836 for (j=a->i[i]; j<a->i[i+1]; j++) { 1837 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1838 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1839 aa = a->a + j*bs2; 1840 for (k=0; k<bs; k++) { 1841 for (l=0; l<bs; l++) { 1842 if (PetscRealPart(*aa++) != 0.) continue; 1843 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1844 } 1845 } 1846 } 1847 } 1848 1849 color = PETSC_DRAW_RED; 1850 for (i=0,row=0; i<mbs; i++,row+=bs) { 1851 for (j=a->i[i]; j<a->i[i+1]; j++) { 1852 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1853 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1854 aa = a->a + j*bs2; 1855 for (k=0; k<bs; k++) { 1856 for (l=0; l<bs; l++) { 1857 if (PetscRealPart(*aa++) <= 0.) continue; 1858 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1859 } 1860 } 1861 } 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1868 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1869 { 1870 PetscErrorCode ierr; 1871 PetscReal xl,yl,xr,yr,w,h; 1872 PetscDraw draw; 1873 PetscBool isnull; 1874 1875 PetscFunctionBegin; 1876 1877 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1878 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1879 1880 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1881 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1882 xr += w; yr += h; xl = -w; yl = -h; 1883 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1884 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1885 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatView_SeqBAIJ" 1891 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1892 { 1893 PetscErrorCode ierr; 1894 PetscBool iascii,isbinary,isdraw; 1895 1896 PetscFunctionBegin; 1897 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1898 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1899 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1900 if (iascii){ 1901 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1902 } else if (isbinary) { 1903 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1904 } else if (isdraw) { 1905 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1906 } else { 1907 Mat B; 1908 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1909 ierr = MatView(B,viewer);CHKERRQ(ierr); 1910 ierr = MatDestroy(&B);CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1918 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1919 { 1920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1921 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1922 PetscInt *ai = a->i,*ailen = a->ilen; 1923 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1924 MatScalar *ap,*aa = a->a; 1925 1926 PetscFunctionBegin; 1927 for (k=0; k<m; k++) { /* loop over rows */ 1928 row = im[k]; brow = row/bs; 1929 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1930 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1931 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1932 nrow = ailen[brow]; 1933 for (l=0; l<n; l++) { /* loop over columns */ 1934 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1935 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1936 col = in[l] ; 1937 bcol = col/bs; 1938 cidx = col%bs; 1939 ridx = row%bs; 1940 high = nrow; 1941 low = 0; /* assume unsorted */ 1942 while (high-low > 5) { 1943 t = (low+high)/2; 1944 if (rp[t] > bcol) high = t; 1945 else low = t; 1946 } 1947 for (i=low; i<high; i++) { 1948 if (rp[i] > bcol) break; 1949 if (rp[i] == bcol) { 1950 *v++ = ap[bs2*i+bs*cidx+ridx]; 1951 goto finished; 1952 } 1953 } 1954 *v++ = 0.0; 1955 finished:; 1956 } 1957 } 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1963 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1964 { 1965 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1966 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1967 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1968 PetscErrorCode ierr; 1969 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1970 PetscBool roworiented=a->roworiented; 1971 const PetscScalar *value = v; 1972 MatScalar *ap,*aa = a->a,*bap; 1973 1974 PetscFunctionBegin; 1975 if (roworiented) { 1976 stepval = (n-1)*bs; 1977 } else { 1978 stepval = (m-1)*bs; 1979 } 1980 for (k=0; k<m; k++) { /* loop over added rows */ 1981 row = im[k]; 1982 if (row < 0) continue; 1983 #if defined(PETSC_USE_DEBUG) 1984 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1985 #endif 1986 rp = aj + ai[row]; 1987 ap = aa + bs2*ai[row]; 1988 rmax = imax[row]; 1989 nrow = ailen[row]; 1990 low = 0; 1991 high = nrow; 1992 for (l=0; l<n; l++) { /* loop over added columns */ 1993 if (in[l] < 0) continue; 1994 #if defined(PETSC_USE_DEBUG) 1995 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1996 #endif 1997 col = in[l]; 1998 if (roworiented) { 1999 value = v + (k*(stepval+bs) + l)*bs; 2000 } else { 2001 value = v + (l*(stepval+bs) + k)*bs; 2002 } 2003 if (col <= lastcol) low = 0; else high = nrow; 2004 lastcol = col; 2005 while (high-low > 7) { 2006 t = (low+high)/2; 2007 if (rp[t] > col) high = t; 2008 else low = t; 2009 } 2010 for (i=low; i<high; i++) { 2011 if (rp[i] > col) break; 2012 if (rp[i] == col) { 2013 bap = ap + bs2*i; 2014 if (roworiented) { 2015 if (is == ADD_VALUES) { 2016 for (ii=0; ii<bs; ii++,value+=stepval) { 2017 for (jj=ii; jj<bs2; jj+=bs) { 2018 bap[jj] += *value++; 2019 } 2020 } 2021 } else { 2022 for (ii=0; ii<bs; ii++,value+=stepval) { 2023 for (jj=ii; jj<bs2; jj+=bs) { 2024 bap[jj] = *value++; 2025 } 2026 } 2027 } 2028 } else { 2029 if (is == ADD_VALUES) { 2030 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2031 for (jj=0; jj<bs; jj++) { 2032 bap[jj] += value[jj]; 2033 } 2034 bap += bs; 2035 } 2036 } else { 2037 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2038 for (jj=0; jj<bs; jj++) { 2039 bap[jj] = value[jj]; 2040 } 2041 bap += bs; 2042 } 2043 } 2044 } 2045 goto noinsert2; 2046 } 2047 } 2048 if (nonew == 1) goto noinsert2; 2049 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2050 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2051 N = nrow++ - 1; high++; 2052 /* shift up all the later entries in this row */ 2053 for (ii=N; ii>=i; ii--) { 2054 rp[ii+1] = rp[ii]; 2055 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2056 } 2057 if (N >= i) { 2058 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2059 } 2060 rp[i] = col; 2061 bap = ap + bs2*i; 2062 if (roworiented) { 2063 for (ii=0; ii<bs; ii++,value+=stepval) { 2064 for (jj=ii; jj<bs2; jj+=bs) { 2065 bap[jj] = *value++; 2066 } 2067 } 2068 } else { 2069 for (ii=0; ii<bs; ii++,value+=stepval) { 2070 for (jj=0; jj<bs; jj++) { 2071 *bap++ = *value++; 2072 } 2073 } 2074 } 2075 noinsert2:; 2076 low = i; 2077 } 2078 ailen[row] = nrow; 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2085 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2086 { 2087 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2088 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2089 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2090 PetscErrorCode ierr; 2091 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2092 MatScalar *aa = a->a,*ap; 2093 PetscReal ratio=0.6; 2094 2095 PetscFunctionBegin; 2096 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2097 2098 if (m) rmax = ailen[0]; 2099 for (i=1; i<mbs; i++) { 2100 /* move each row back by the amount of empty slots (fshift) before it*/ 2101 fshift += imax[i-1] - ailen[i-1]; 2102 rmax = PetscMax(rmax,ailen[i]); 2103 if (fshift) { 2104 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2105 N = ailen[i]; 2106 for (j=0; j<N; j++) { 2107 ip[j-fshift] = ip[j]; 2108 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2109 } 2110 } 2111 ai[i] = ai[i-1] + ailen[i-1]; 2112 } 2113 if (mbs) { 2114 fshift += imax[mbs-1] - ailen[mbs-1]; 2115 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2116 } 2117 /* reset ilen and imax for each row */ 2118 for (i=0; i<mbs; i++) { 2119 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2120 } 2121 a->nz = ai[mbs]; 2122 2123 /* diagonals may have moved, so kill the diagonal pointers */ 2124 a->idiagvalid = PETSC_FALSE; 2125 if (fshift && a->diag) { 2126 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2127 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2128 a->diag = 0; 2129 } 2130 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2131 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2132 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2133 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2134 A->info.mallocs += a->reallocs; 2135 a->reallocs = 0; 2136 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2137 2138 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2139 A->same_nonzero = PETSC_TRUE; 2140 PetscFunctionReturn(0); 2141 } 2142 2143 /* 2144 This function returns an array of flags which indicate the locations of contiguous 2145 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2146 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2147 Assume: sizes should be long enough to hold all the values. 2148 */ 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2151 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2152 { 2153 PetscInt i,j,k,row; 2154 PetscBool flg; 2155 2156 PetscFunctionBegin; 2157 for (i=0,j=0; i<n; j++) { 2158 row = idx[i]; 2159 if (row%bs!=0) { /* Not the begining of a block */ 2160 sizes[j] = 1; 2161 i++; 2162 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2163 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2164 i++; 2165 } else { /* Begining of the block, so check if the complete block exists */ 2166 flg = PETSC_TRUE; 2167 for (k=1; k<bs; k++) { 2168 if (row+k != idx[i+k]) { /* break in the block */ 2169 flg = PETSC_FALSE; 2170 break; 2171 } 2172 } 2173 if (flg) { /* No break in the bs */ 2174 sizes[j] = bs; 2175 i+= bs; 2176 } else { 2177 sizes[j] = 1; 2178 i++; 2179 } 2180 } 2181 } 2182 *bs_max = j; 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2188 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2189 { 2190 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2191 PetscErrorCode ierr; 2192 PetscInt i,j,k,count,*rows; 2193 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2194 PetscScalar zero = 0.0; 2195 MatScalar *aa; 2196 const PetscScalar *xx; 2197 PetscScalar *bb; 2198 2199 PetscFunctionBegin; 2200 /* fix right hand side if needed */ 2201 if (x && b) { 2202 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2203 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2204 for (i=0; i<is_n; i++) { 2205 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2206 } 2207 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2208 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2209 } 2210 2211 /* Make a copy of the IS and sort it */ 2212 /* allocate memory for rows,sizes */ 2213 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2214 2215 /* copy IS values to rows, and sort them */ 2216 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2217 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2218 2219 if (baij->keepnonzeropattern) { 2220 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2221 bs_max = is_n; 2222 A->same_nonzero = PETSC_TRUE; 2223 } else { 2224 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2225 A->same_nonzero = PETSC_FALSE; 2226 } 2227 2228 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2229 row = rows[j]; 2230 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2231 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2232 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2233 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2234 if (diag != (PetscScalar)0.0) { 2235 if (baij->ilen[row/bs] > 0) { 2236 baij->ilen[row/bs] = 1; 2237 baij->j[baij->i[row/bs]] = row/bs; 2238 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2239 } 2240 /* Now insert all the diagonal values for this bs */ 2241 for (k=0; k<bs; k++) { 2242 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2243 } 2244 } else { /* (diag == 0.0) */ 2245 baij->ilen[row/bs] = 0; 2246 } /* end (diag == 0.0) */ 2247 } else { /* (sizes[i] != bs) */ 2248 #if defined (PETSC_USE_DEBUG) 2249 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2250 #endif 2251 for (k=0; k<count; k++) { 2252 aa[0] = zero; 2253 aa += bs; 2254 } 2255 if (diag != (PetscScalar)0.0) { 2256 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2257 } 2258 } 2259 } 2260 2261 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2262 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2268 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2269 { 2270 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2271 PetscErrorCode ierr; 2272 PetscInt i,j,k,count; 2273 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2274 PetscScalar zero = 0.0; 2275 MatScalar *aa; 2276 const PetscScalar *xx; 2277 PetscScalar *bb; 2278 PetscBool *zeroed,vecs = PETSC_FALSE; 2279 2280 PetscFunctionBegin; 2281 /* fix right hand side if needed */ 2282 if (x && b) { 2283 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2284 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2285 vecs = PETSC_TRUE; 2286 } 2287 A->same_nonzero = PETSC_TRUE; 2288 2289 /* zero the columns */ 2290 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2291 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2292 for (i=0; i<is_n; i++) { 2293 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2294 zeroed[is_idx[i]] = PETSC_TRUE; 2295 } 2296 for (i=0; i<A->rmap->N; i++) { 2297 if (!zeroed[i]) { 2298 row = i/bs; 2299 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2300 for (k=0; k<bs; k++) { 2301 col = bs*baij->j[j] + k; 2302 if (zeroed[col]) { 2303 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2304 if (vecs) bb[i] -= aa[0]*xx[col]; 2305 aa[0] = 0.0; 2306 } 2307 } 2308 } 2309 } else if (vecs) bb[i] = diag*xx[i]; 2310 } 2311 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2312 if (vecs) { 2313 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2314 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2315 } 2316 2317 /* zero the rows */ 2318 for (i=0; i<is_n; i++) { 2319 row = is_idx[i]; 2320 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2321 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2322 for (k=0; k<count; k++) { 2323 aa[0] = zero; 2324 aa += bs; 2325 } 2326 if (diag != (PetscScalar)0.0) { 2327 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2328 } 2329 } 2330 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #undef __FUNCT__ 2335 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2336 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2337 { 2338 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2339 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2340 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2341 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2342 PetscErrorCode ierr; 2343 PetscInt ridx,cidx,bs2=a->bs2; 2344 PetscBool roworiented=a->roworiented; 2345 MatScalar *ap,value,*aa=a->a,*bap; 2346 2347 PetscFunctionBegin; 2348 if (v) PetscValidScalarPointer(v,6); 2349 for (k=0; k<m; k++) { /* loop over added rows */ 2350 row = im[k]; 2351 brow = row/bs; 2352 if (row < 0) continue; 2353 #if defined(PETSC_USE_DEBUG) 2354 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2355 #endif 2356 rp = aj + ai[brow]; 2357 ap = aa + bs2*ai[brow]; 2358 rmax = imax[brow]; 2359 nrow = ailen[brow]; 2360 low = 0; 2361 high = nrow; 2362 for (l=0; l<n; l++) { /* loop over added columns */ 2363 if (in[l] < 0) continue; 2364 #if defined(PETSC_USE_DEBUG) 2365 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2366 #endif 2367 col = in[l]; bcol = col/bs; 2368 ridx = row % bs; cidx = col % bs; 2369 if (roworiented) { 2370 value = v[l + k*n]; 2371 } else { 2372 value = v[k + l*m]; 2373 } 2374 if (col <= lastcol) low = 0; else high = nrow; 2375 lastcol = col; 2376 while (high-low > 7) { 2377 t = (low+high)/2; 2378 if (rp[t] > bcol) high = t; 2379 else low = t; 2380 } 2381 for (i=low; i<high; i++) { 2382 if (rp[i] > bcol) break; 2383 if (rp[i] == bcol) { 2384 bap = ap + bs2*i + bs*cidx + ridx; 2385 if (is == ADD_VALUES) *bap += value; 2386 else *bap = value; 2387 goto noinsert1; 2388 } 2389 } 2390 if (nonew == 1) goto noinsert1; 2391 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2392 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2393 N = nrow++ - 1; high++; 2394 /* shift up all the later entries in this row */ 2395 for (ii=N; ii>=i; ii--) { 2396 rp[ii+1] = rp[ii]; 2397 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2398 } 2399 if (N>=i) { 2400 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2401 } 2402 rp[i] = bcol; 2403 ap[bs2*i + bs*cidx + ridx] = value; 2404 a->nz++; 2405 noinsert1:; 2406 low = i; 2407 } 2408 ailen[brow] = nrow; 2409 } 2410 A->same_nonzero = PETSC_FALSE; 2411 PetscFunctionReturn(0); 2412 } 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2416 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2417 { 2418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2419 Mat outA; 2420 PetscErrorCode ierr; 2421 PetscBool row_identity,col_identity; 2422 2423 PetscFunctionBegin; 2424 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2425 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2426 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2427 if (!row_identity || !col_identity) { 2428 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2429 } 2430 2431 outA = inA; 2432 inA->factortype = MAT_FACTOR_LU; 2433 2434 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2435 2436 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2437 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2438 a->row = row; 2439 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2440 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2441 a->col = col; 2442 2443 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2444 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2445 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2446 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2447 2448 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2449 if (!a->solve_work) { 2450 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2451 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2452 } 2453 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2454 2455 PetscFunctionReturn(0); 2456 } 2457 2458 EXTERN_C_BEGIN 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2461 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2462 { 2463 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2464 PetscInt i,nz,mbs; 2465 2466 PetscFunctionBegin; 2467 nz = baij->maxnz; 2468 mbs = baij->mbs; 2469 for (i=0; i<nz; i++) { 2470 baij->j[i] = indices[i]; 2471 } 2472 baij->nz = nz; 2473 for (i=0; i<mbs; i++) { 2474 baij->ilen[i] = baij->imax[i]; 2475 } 2476 PetscFunctionReturn(0); 2477 } 2478 EXTERN_C_END 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2482 /*@ 2483 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2484 in the matrix. 2485 2486 Input Parameters: 2487 + mat - the SeqBAIJ matrix 2488 - indices - the column indices 2489 2490 Level: advanced 2491 2492 Notes: 2493 This can be called if you have precomputed the nonzero structure of the 2494 matrix and want to provide it to the matrix object to improve the performance 2495 of the MatSetValues() operation. 2496 2497 You MUST have set the correct numbers of nonzeros per row in the call to 2498 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2499 2500 MUST be called before any calls to MatSetValues(); 2501 2502 @*/ 2503 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2504 { 2505 PetscErrorCode ierr; 2506 2507 PetscFunctionBegin; 2508 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2509 PetscValidPointer(indices,2); 2510 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2516 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2517 { 2518 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2519 PetscErrorCode ierr; 2520 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2521 PetscReal atmp; 2522 PetscScalar *x,zero = 0.0; 2523 MatScalar *aa; 2524 PetscInt ncols,brow,krow,kcol; 2525 2526 PetscFunctionBegin; 2527 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2528 bs = A->rmap->bs; 2529 aa = a->a; 2530 ai = a->i; 2531 aj = a->j; 2532 mbs = a->mbs; 2533 2534 ierr = VecSet(v,zero);CHKERRQ(ierr); 2535 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2536 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2537 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2538 for (i=0; i<mbs; i++) { 2539 ncols = ai[1] - ai[0]; ai++; 2540 brow = bs*i; 2541 for (j=0; j<ncols; j++){ 2542 for (kcol=0; kcol<bs; kcol++){ 2543 for (krow=0; krow<bs; krow++){ 2544 atmp = PetscAbsScalar(*aa);aa++; 2545 row = brow + krow; /* row index */ 2546 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2547 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2548 } 2549 } 2550 aj++; 2551 } 2552 } 2553 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatCopy_SeqBAIJ" 2559 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2560 { 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 /* If the two matrices have the same copy implementation, use fast copy. */ 2565 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2566 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2567 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2568 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2569 2570 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2571 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2572 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2573 } else { 2574 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2575 } 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2581 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2582 { 2583 PetscErrorCode ierr; 2584 2585 PetscFunctionBegin; 2586 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2592 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2593 { 2594 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2595 PetscFunctionBegin; 2596 *array = a->a; 2597 PetscFunctionReturn(0); 2598 } 2599 2600 #undef __FUNCT__ 2601 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2602 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2603 { 2604 PetscFunctionBegin; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2610 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2611 { 2612 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2613 PetscErrorCode ierr; 2614 PetscInt i,bs=Y->rmap->bs,j,bs2; 2615 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2616 2617 PetscFunctionBegin; 2618 if (str == SAME_NONZERO_PATTERN) { 2619 PetscScalar alpha = a; 2620 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2621 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2622 if (y->xtoy && y->XtoY != X) { 2623 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2624 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2625 } 2626 if (!y->xtoy) { /* get xtoy */ 2627 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2628 y->XtoY = X; 2629 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2630 } 2631 bs2 = bs*bs; 2632 for (i=0; i<x->nz; i++) { 2633 j = 0; 2634 while (j < bs2){ 2635 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2636 j++; 2637 } 2638 } 2639 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2640 } else { 2641 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2642 } 2643 PetscFunctionReturn(0); 2644 } 2645 2646 #undef __FUNCT__ 2647 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2648 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2649 { 2650 PetscInt rbs,cbs; 2651 PetscErrorCode ierr; 2652 2653 PetscFunctionBegin; 2654 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2655 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2656 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2657 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2658 PetscFunctionReturn(0); 2659 } 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2663 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2664 { 2665 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2666 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2667 MatScalar *aa = a->a; 2668 2669 PetscFunctionBegin; 2670 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2676 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2677 { 2678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2679 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2680 MatScalar *aa = a->a; 2681 2682 PetscFunctionBegin; 2683 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2688 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2691 /* 2692 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2693 */ 2694 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2695 { 2696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2697 PetscErrorCode ierr; 2698 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2699 PetscInt nz = a->i[m],row,*jj,mr,col; 2700 2701 PetscFunctionBegin; 2702 *nn = n; 2703 if (!ia) PetscFunctionReturn(0); 2704 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2705 else { 2706 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2707 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2708 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2709 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2710 jj = a->j; 2711 for (i=0; i<nz; i++) { 2712 collengths[jj[i]]++; 2713 } 2714 cia[0] = oshift; 2715 for (i=0; i<n; i++) { 2716 cia[i+1] = cia[i] + collengths[i]; 2717 } 2718 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2719 jj = a->j; 2720 for (row=0; row<m; row++) { 2721 mr = a->i[row+1] - a->i[row]; 2722 for (i=0; i<mr; i++) { 2723 col = *jj++; 2724 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2725 } 2726 } 2727 ierr = PetscFree(collengths);CHKERRQ(ierr); 2728 *ia = cia; *ja = cja; 2729 } 2730 PetscFunctionReturn(0); 2731 } 2732 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2735 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2736 { 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 if (!ia) PetscFunctionReturn(0); 2741 ierr = PetscFree(*ia);CHKERRQ(ierr); 2742 ierr = PetscFree(*ja);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 2746 #undef __FUNCT__ 2747 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2748 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2749 { 2750 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2751 PetscErrorCode ierr; 2752 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2753 PetscScalar dx,*y,*xx,*w3_array; 2754 PetscScalar *vscale_array; 2755 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2756 Vec w1=coloring->w1,w2=coloring->w2,w3; 2757 void *fctx = coloring->fctx; 2758 PetscBool flg = PETSC_FALSE; 2759 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2760 Vec x1_tmp; 2761 2762 PetscFunctionBegin; 2763 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2764 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2765 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2766 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2767 2768 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2769 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2770 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2771 if (flg) { 2772 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2773 } else { 2774 PetscBool assembled; 2775 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2776 if (assembled) { 2777 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2778 } 2779 } 2780 2781 x1_tmp = x1; 2782 if (!coloring->vscale){ 2783 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2784 } 2785 2786 /* 2787 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2788 coloring->F for the coarser grids from the finest 2789 */ 2790 if (coloring->F) { 2791 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2792 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2793 if (m1 != m2) { 2794 coloring->F = 0; 2795 } 2796 } 2797 2798 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2799 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2800 } 2801 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2802 2803 /* Set w1 = F(x1) */ 2804 if (coloring->F) { 2805 w1 = coloring->F; /* use already computed value of function */ 2806 coloring->F = 0; 2807 } else { 2808 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2809 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2810 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2811 } 2812 2813 if (!coloring->w3) { 2814 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2815 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2816 } 2817 w3 = coloring->w3; 2818 2819 CHKMEMQ; 2820 /* Compute all the local scale factors, including ghost points */ 2821 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2822 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2823 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2824 if (ctype == IS_COLORING_GHOSTED){ 2825 col_start = 0; col_end = N; 2826 } else if (ctype == IS_COLORING_GLOBAL){ 2827 xx = xx - start; 2828 vscale_array = vscale_array - start; 2829 col_start = start; col_end = N + start; 2830 } CHKMEMQ; 2831 for (col=col_start; col<col_end; col++){ 2832 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2833 if (coloring->htype[0] == 'w') { 2834 dx = 1.0 + unorm; 2835 } else { 2836 dx = xx[col]; 2837 } 2838 if (dx == (PetscScalar)0.0) dx = 1.0; 2839 #if !defined(PETSC_USE_COMPLEX) 2840 if (dx < umin && dx >= 0.0) dx = umin; 2841 else if (dx < 0.0 && dx > -umin) dx = -umin; 2842 #else 2843 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2844 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2845 #endif 2846 dx *= epsilon; 2847 vscale_array[col] = (PetscScalar)1.0/dx; 2848 } CHKMEMQ; 2849 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2850 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2851 if (ctype == IS_COLORING_GLOBAL){ 2852 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2853 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2854 } 2855 CHKMEMQ; 2856 if (coloring->vscaleforrow) { 2857 vscaleforrow = coloring->vscaleforrow; 2858 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2859 2860 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2861 /* 2862 Loop over each color 2863 */ 2864 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2865 for (k=0; k<coloring->ncolors; k++) { 2866 coloring->currentcolor = k; 2867 for (i=0; i<bs; i++) { 2868 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2869 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2870 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2871 /* 2872 Loop over each column associated with color 2873 adding the perturbation to the vector w3. 2874 */ 2875 for (l=0; l<coloring->ncolumns[k]; l++) { 2876 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2877 if (coloring->htype[0] == 'w') { 2878 dx = 1.0 + unorm; 2879 } else { 2880 dx = xx[col]; 2881 } 2882 if (dx == (PetscScalar)0.0) dx = 1.0; 2883 #if !defined(PETSC_USE_COMPLEX) 2884 if (dx < umin && dx >= 0.0) dx = umin; 2885 else if (dx < 0.0 && dx > -umin) dx = -umin; 2886 #else 2887 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2888 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2889 #endif 2890 dx *= epsilon; 2891 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2892 w3_array[col] += dx; 2893 } 2894 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2895 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2896 2897 /* 2898 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2899 w2 = F(x1 + dx) - F(x1) 2900 */ 2901 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2902 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2903 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2904 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2905 2906 /* 2907 Loop over rows of vector, putting results into Jacobian matrix 2908 */ 2909 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2910 for (l=0; l<coloring->nrows[k]; l++) { 2911 row = bs*coloring->rows[k][l]; /* local row index */ 2912 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2913 for (j=0; j<bs; j++) { 2914 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2915 srows[j] = row + start + j; 2916 } 2917 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2918 } 2919 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2920 } 2921 } /* endof for each color */ 2922 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2923 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2924 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2925 ierr = PetscFree(srows);CHKERRQ(ierr); 2926 2927 coloring->currentcolor = -1; 2928 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2929 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2930 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 /* -------------------------------------------------------------------*/ 2935 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2936 MatGetRow_SeqBAIJ, 2937 MatRestoreRow_SeqBAIJ, 2938 MatMult_SeqBAIJ_N, 2939 /* 4*/ MatMultAdd_SeqBAIJ_N, 2940 MatMultTranspose_SeqBAIJ, 2941 MatMultTransposeAdd_SeqBAIJ, 2942 0, 2943 0, 2944 0, 2945 /*10*/ 0, 2946 MatLUFactor_SeqBAIJ, 2947 0, 2948 0, 2949 MatTranspose_SeqBAIJ, 2950 /*15*/ MatGetInfo_SeqBAIJ, 2951 MatEqual_SeqBAIJ, 2952 MatGetDiagonal_SeqBAIJ, 2953 MatDiagonalScale_SeqBAIJ, 2954 MatNorm_SeqBAIJ, 2955 /*20*/ 0, 2956 MatAssemblyEnd_SeqBAIJ, 2957 MatSetOption_SeqBAIJ, 2958 MatZeroEntries_SeqBAIJ, 2959 /*24*/ MatZeroRows_SeqBAIJ, 2960 0, 2961 0, 2962 0, 2963 0, 2964 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2965 0, 2966 0, 2967 MatGetArray_SeqBAIJ, 2968 MatRestoreArray_SeqBAIJ, 2969 /*34*/ MatDuplicate_SeqBAIJ, 2970 0, 2971 0, 2972 MatILUFactor_SeqBAIJ, 2973 0, 2974 /*39*/ MatAXPY_SeqBAIJ, 2975 MatGetSubMatrices_SeqBAIJ, 2976 MatIncreaseOverlap_SeqBAIJ, 2977 MatGetValues_SeqBAIJ, 2978 MatCopy_SeqBAIJ, 2979 /*44*/ 0, 2980 MatScale_SeqBAIJ, 2981 0, 2982 0, 2983 MatZeroRowsColumns_SeqBAIJ, 2984 /*49*/ MatSetBlockSize_SeqBAIJ, 2985 MatGetRowIJ_SeqBAIJ, 2986 MatRestoreRowIJ_SeqBAIJ, 2987 MatGetColumnIJ_SeqBAIJ, 2988 MatRestoreColumnIJ_SeqBAIJ, 2989 /*54*/ MatFDColoringCreate_SeqAIJ, 2990 0, 2991 0, 2992 0, 2993 MatSetValuesBlocked_SeqBAIJ, 2994 /*59*/ MatGetSubMatrix_SeqBAIJ, 2995 MatDestroy_SeqBAIJ, 2996 MatView_SeqBAIJ, 2997 0, 2998 0, 2999 /*64*/ 0, 3000 0, 3001 0, 3002 0, 3003 0, 3004 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 3005 0, 3006 MatConvert_Basic, 3007 0, 3008 0, 3009 /*74*/ 0, 3010 MatFDColoringApply_BAIJ, 3011 0, 3012 0, 3013 0, 3014 /*79*/ 0, 3015 0, 3016 0, 3017 0, 3018 MatLoad_SeqBAIJ, 3019 /*84*/ 0, 3020 0, 3021 0, 3022 0, 3023 0, 3024 /*89*/ 0, 3025 0, 3026 0, 3027 0, 3028 0, 3029 /*94*/ 0, 3030 0, 3031 0, 3032 0, 3033 0, 3034 /*99*/0, 3035 0, 3036 0, 3037 0, 3038 0, 3039 /*104*/0, 3040 MatRealPart_SeqBAIJ, 3041 MatImaginaryPart_SeqBAIJ, 3042 0, 3043 0, 3044 /*109*/0, 3045 0, 3046 0, 3047 0, 3048 MatMissingDiagonal_SeqBAIJ, 3049 /*114*/0, 3050 0, 3051 0, 3052 0, 3053 0, 3054 /*119*/0, 3055 0, 3056 MatMultHermitianTranspose_SeqBAIJ, 3057 MatMultHermitianTransposeAdd_SeqBAIJ, 3058 0, 3059 }; 3060 3061 EXTERN_C_BEGIN 3062 #undef __FUNCT__ 3063 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3064 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3065 { 3066 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3067 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3068 PetscErrorCode ierr; 3069 3070 PetscFunctionBegin; 3071 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3072 3073 /* allocate space for values if not already there */ 3074 if (!aij->saved_values) { 3075 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3076 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3077 } 3078 3079 /* copy values over */ 3080 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 EXTERN_C_END 3084 3085 EXTERN_C_BEGIN 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3088 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3089 { 3090 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3091 PetscErrorCode ierr; 3092 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3093 3094 PetscFunctionBegin; 3095 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3096 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3097 3098 /* copy values over */ 3099 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3100 PetscFunctionReturn(0); 3101 } 3102 EXTERN_C_END 3103 3104 EXTERN_C_BEGIN 3105 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3106 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3107 EXTERN_C_END 3108 3109 EXTERN_C_BEGIN 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3112 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3113 { 3114 Mat_SeqBAIJ *b; 3115 PetscErrorCode ierr; 3116 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3117 PetscBool flg,skipallocation = PETSC_FALSE; 3118 3119 PetscFunctionBegin; 3120 3121 if (nz == MAT_SKIP_ALLOCATION) { 3122 skipallocation = PETSC_TRUE; 3123 nz = 0; 3124 } 3125 3126 if (bs < 0) { 3127 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3128 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3129 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3130 bs = PetscAbs(bs); 3131 } 3132 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3133 bs = newbs; 3134 3135 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3136 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3137 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3138 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3139 3140 B->preallocated = PETSC_TRUE; 3141 3142 mbs = B->rmap->n/bs; 3143 nbs = B->cmap->n/bs; 3144 bs2 = bs*bs; 3145 3146 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3147 3148 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3149 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3150 if (nnz) { 3151 for (i=0; i<mbs; i++) { 3152 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3153 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3154 } 3155 } 3156 3157 b = (Mat_SeqBAIJ*)B->data; 3158 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3159 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3160 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3161 3162 if (!flg) { 3163 switch (bs) { 3164 case 1: 3165 B->ops->mult = MatMult_SeqBAIJ_1; 3166 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3167 B->ops->sor = MatSOR_SeqBAIJ_1; 3168 break; 3169 case 2: 3170 B->ops->mult = MatMult_SeqBAIJ_2; 3171 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3172 B->ops->sor = MatSOR_SeqBAIJ_2; 3173 break; 3174 case 3: 3175 B->ops->mult = MatMult_SeqBAIJ_3; 3176 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3177 B->ops->sor = MatSOR_SeqBAIJ_3; 3178 break; 3179 case 4: 3180 B->ops->mult = MatMult_SeqBAIJ_4; 3181 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3182 B->ops->sor = MatSOR_SeqBAIJ_4; 3183 break; 3184 case 5: 3185 B->ops->mult = MatMult_SeqBAIJ_5; 3186 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3187 B->ops->sor = MatSOR_SeqBAIJ_5; 3188 break; 3189 case 6: 3190 B->ops->mult = MatMult_SeqBAIJ_6; 3191 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3192 B->ops->sor = MatSOR_SeqBAIJ_6; 3193 break; 3194 case 7: 3195 B->ops->mult = MatMult_SeqBAIJ_7; 3196 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3197 B->ops->sor = MatSOR_SeqBAIJ_7; 3198 break; 3199 case 15: 3200 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3201 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3202 B->ops->sor = MatSOR_SeqBAIJ_N; 3203 break; 3204 default: 3205 B->ops->mult = MatMult_SeqBAIJ_N; 3206 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3207 B->ops->sor = MatSOR_SeqBAIJ_N; 3208 break; 3209 } 3210 } 3211 B->rmap->bs = bs; 3212 b->mbs = mbs; 3213 b->nbs = nbs; 3214 if (!skipallocation) { 3215 if (!b->imax) { 3216 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3217 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3218 b->free_imax_ilen = PETSC_TRUE; 3219 } 3220 /* b->ilen will count nonzeros in each block row so far. */ 3221 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3222 if (!nnz) { 3223 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3224 else if (nz < 0) nz = 1; 3225 for (i=0; i<mbs; i++) b->imax[i] = nz; 3226 nz = nz*mbs; 3227 } else { 3228 nz = 0; 3229 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3230 } 3231 3232 /* allocate the matrix space */ 3233 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3234 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3235 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3236 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3237 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3238 b->singlemalloc = PETSC_TRUE; 3239 b->i[0] = 0; 3240 for (i=1; i<mbs+1; i++) { 3241 b->i[i] = b->i[i-1] + b->imax[i-1]; 3242 } 3243 b->free_a = PETSC_TRUE; 3244 b->free_ij = PETSC_TRUE; 3245 } else { 3246 b->free_a = PETSC_FALSE; 3247 b->free_ij = PETSC_FALSE; 3248 } 3249 3250 B->rmap->bs = bs; 3251 b->bs2 = bs2; 3252 b->mbs = mbs; 3253 b->nz = 0; 3254 b->maxnz = nz; 3255 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3256 PetscFunctionReturn(0); 3257 } 3258 EXTERN_C_END 3259 3260 EXTERN_C_BEGIN 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3263 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3264 { 3265 PetscInt i,m,nz,nz_max=0,*nnz; 3266 PetscScalar *values=0; 3267 PetscErrorCode ierr; 3268 3269 PetscFunctionBegin; 3270 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3271 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3272 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3273 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3274 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3275 m = B->rmap->n/bs; 3276 3277 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3278 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3279 for(i=0; i<m; i++) { 3280 nz = ii[i+1]- ii[i]; 3281 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3282 nz_max = PetscMax(nz_max, nz); 3283 nnz[i] = nz; 3284 } 3285 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3286 ierr = PetscFree(nnz);CHKERRQ(ierr); 3287 3288 values = (PetscScalar*)V; 3289 if (!values) { 3290 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3291 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3292 } 3293 for (i=0; i<m; i++) { 3294 PetscInt ncols = ii[i+1] - ii[i]; 3295 const PetscInt *icols = jj + ii[i]; 3296 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3297 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3298 } 3299 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3300 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302 3303 PetscFunctionReturn(0); 3304 } 3305 EXTERN_C_END 3306 3307 3308 EXTERN_C_BEGIN 3309 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3310 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3311 #if defined(PETSC_HAVE_MUMPS) 3312 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3313 #endif 3314 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3315 EXTERN_C_END 3316 3317 /*MC 3318 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3319 block sparse compressed row format. 3320 3321 Options Database Keys: 3322 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3323 3324 Level: beginner 3325 3326 .seealso: MatCreateSeqBAIJ() 3327 M*/ 3328 3329 EXTERN_C_BEGIN 3330 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3331 EXTERN_C_END 3332 3333 EXTERN_C_BEGIN 3334 #undef __FUNCT__ 3335 #define __FUNCT__ "MatCreate_SeqBAIJ" 3336 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3337 { 3338 PetscErrorCode ierr; 3339 PetscMPIInt size; 3340 Mat_SeqBAIJ *b; 3341 3342 PetscFunctionBegin; 3343 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3344 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3345 3346 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3347 B->data = (void*)b; 3348 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3349 b->row = 0; 3350 b->col = 0; 3351 b->icol = 0; 3352 b->reallocs = 0; 3353 b->saved_values = 0; 3354 3355 b->roworiented = PETSC_TRUE; 3356 b->nonew = 0; 3357 b->diag = 0; 3358 b->solve_work = 0; 3359 b->mult_work = 0; 3360 B->spptr = 0; 3361 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3362 b->keepnonzeropattern = PETSC_FALSE; 3363 b->xtoy = 0; 3364 b->XtoY = 0; 3365 B->same_nonzero = PETSC_FALSE; 3366 3367 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3368 "MatGetFactorAvailable_seqbaij_petsc", 3369 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3370 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3371 "MatGetFactor_seqbaij_petsc", 3372 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3373 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3374 "MatGetFactor_seqbaij_bstrm", 3375 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3376 #if defined(PETSC_HAVE_MUMPS) 3377 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3378 #endif 3379 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3380 "MatInvertBlockDiagonal_SeqBAIJ", 3381 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3382 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3383 "MatStoreValues_SeqBAIJ", 3384 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3385 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3386 "MatRetrieveValues_SeqBAIJ", 3387 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3388 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3389 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3390 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3391 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3392 "MatConvert_SeqBAIJ_SeqAIJ", 3393 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3394 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3395 "MatConvert_SeqBAIJ_SeqSBAIJ", 3396 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3397 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3398 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3399 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3401 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3402 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3404 "MatConvert_SeqBAIJ_SeqBSTRM", 3405 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3407 "MatIsTranspose_SeqBAIJ", 3408 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3409 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3410 PetscFunctionReturn(0); 3411 } 3412 EXTERN_C_END 3413 3414 #undef __FUNCT__ 3415 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3416 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3417 { 3418 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3419 PetscErrorCode ierr; 3420 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3421 3422 PetscFunctionBegin; 3423 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3424 3425 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3426 c->imax = a->imax; 3427 c->ilen = a->ilen; 3428 c->free_imax_ilen = PETSC_FALSE; 3429 } else { 3430 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3431 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3432 for (i=0; i<mbs; i++) { 3433 c->imax[i] = a->imax[i]; 3434 c->ilen[i] = a->ilen[i]; 3435 } 3436 c->free_imax_ilen = PETSC_TRUE; 3437 } 3438 3439 /* allocate the matrix space */ 3440 if (mallocmatspace){ 3441 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3442 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3443 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3444 c->singlemalloc = PETSC_FALSE; 3445 c->free_ij = PETSC_FALSE; 3446 c->i = a->i; 3447 c->j = a->j; 3448 c->parent = A; 3449 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3450 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3451 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3452 } else { 3453 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3454 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3455 c->singlemalloc = PETSC_TRUE; 3456 c->free_ij = PETSC_TRUE; 3457 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3458 if (mbs > 0) { 3459 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3460 if (cpvalues == MAT_COPY_VALUES) { 3461 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3462 } else { 3463 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3464 } 3465 } 3466 } 3467 } 3468 3469 c->roworiented = a->roworiented; 3470 c->nonew = a->nonew; 3471 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3472 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3473 c->bs2 = a->bs2; 3474 c->mbs = a->mbs; 3475 c->nbs = a->nbs; 3476 3477 if (a->diag) { 3478 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3479 c->diag = a->diag; 3480 c->free_diag = PETSC_FALSE; 3481 } else { 3482 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3483 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3484 for (i=0; i<mbs; i++) { 3485 c->diag[i] = a->diag[i]; 3486 } 3487 c->free_diag = PETSC_TRUE; 3488 } 3489 } else c->diag = 0; 3490 c->nz = a->nz; 3491 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3492 c->solve_work = 0; 3493 c->mult_work = 0; 3494 c->free_a = PETSC_TRUE; 3495 c->free_ij = PETSC_TRUE; 3496 C->preallocated = PETSC_TRUE; 3497 C->assembled = PETSC_TRUE; 3498 3499 c->compressedrow.use = a->compressedrow.use; 3500 c->compressedrow.nrows = a->compressedrow.nrows; 3501 c->compressedrow.check = a->compressedrow.check; 3502 if (a->compressedrow.use){ 3503 i = a->compressedrow.nrows; 3504 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3505 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3506 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3507 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3508 } else { 3509 c->compressedrow.use = PETSC_FALSE; 3510 c->compressedrow.i = PETSC_NULL; 3511 c->compressedrow.rindex = PETSC_NULL; 3512 } 3513 C->same_nonzero = A->same_nonzero; 3514 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3515 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3516 PetscFunctionReturn(0); 3517 } 3518 3519 #undef __FUNCT__ 3520 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3521 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3522 { 3523 PetscErrorCode ierr; 3524 3525 PetscFunctionBegin; 3526 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3527 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3528 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3529 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3530 PetscFunctionReturn(0); 3531 } 3532 3533 #undef __FUNCT__ 3534 #define __FUNCT__ "MatLoad_SeqBAIJ" 3535 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3536 { 3537 Mat_SeqBAIJ *a; 3538 PetscErrorCode ierr; 3539 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3540 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3541 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3542 PetscInt *masked,nmask,tmp,bs2,ishift; 3543 PetscMPIInt size; 3544 int fd; 3545 PetscScalar *aa; 3546 MPI_Comm comm = ((PetscObject)viewer)->comm; 3547 3548 PetscFunctionBegin; 3549 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3550 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3551 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3552 bs2 = bs*bs; 3553 3554 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3555 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3556 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3557 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3558 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3559 M = header[1]; N = header[2]; nz = header[3]; 3560 3561 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3562 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3563 3564 /* 3565 This code adds extra rows to make sure the number of rows is 3566 divisible by the blocksize 3567 */ 3568 mbs = M/bs; 3569 extra_rows = bs - M + bs*(mbs); 3570 if (extra_rows == bs) extra_rows = 0; 3571 else mbs++; 3572 if (extra_rows) { 3573 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3574 } 3575 3576 /* Set global sizes if not already set */ 3577 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3578 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3579 } else { /* Check if the matrix global sizes are correct */ 3580 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3581 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3582 } 3583 3584 /* read in row lengths */ 3585 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3586 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3587 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3588 3589 /* read in column indices */ 3590 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3591 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3592 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3593 3594 /* loop over row lengths determining block row lengths */ 3595 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3596 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3597 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3598 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3599 rowcount = 0; 3600 nzcount = 0; 3601 for (i=0; i<mbs; i++) { 3602 nmask = 0; 3603 for (j=0; j<bs; j++) { 3604 kmax = rowlengths[rowcount]; 3605 for (k=0; k<kmax; k++) { 3606 tmp = jj[nzcount++]/bs; 3607 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3608 } 3609 rowcount++; 3610 } 3611 browlengths[i] += nmask; 3612 /* zero out the mask elements we set */ 3613 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3614 } 3615 3616 /* Do preallocation */ 3617 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3618 a = (Mat_SeqBAIJ*)newmat->data; 3619 3620 /* set matrix "i" values */ 3621 a->i[0] = 0; 3622 for (i=1; i<= mbs; i++) { 3623 a->i[i] = a->i[i-1] + browlengths[i-1]; 3624 a->ilen[i-1] = browlengths[i-1]; 3625 } 3626 a->nz = 0; 3627 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3628 3629 /* read in nonzero values */ 3630 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3631 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3632 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3633 3634 /* set "a" and "j" values into matrix */ 3635 nzcount = 0; jcount = 0; 3636 for (i=0; i<mbs; i++) { 3637 nzcountb = nzcount; 3638 nmask = 0; 3639 for (j=0; j<bs; j++) { 3640 kmax = rowlengths[i*bs+j]; 3641 for (k=0; k<kmax; k++) { 3642 tmp = jj[nzcount++]/bs; 3643 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3644 } 3645 } 3646 /* sort the masked values */ 3647 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3648 3649 /* set "j" values into matrix */ 3650 maskcount = 1; 3651 for (j=0; j<nmask; j++) { 3652 a->j[jcount++] = masked[j]; 3653 mask[masked[j]] = maskcount++; 3654 } 3655 /* set "a" values into matrix */ 3656 ishift = bs2*a->i[i]; 3657 for (j=0; j<bs; j++) { 3658 kmax = rowlengths[i*bs+j]; 3659 for (k=0; k<kmax; k++) { 3660 tmp = jj[nzcountb]/bs ; 3661 block = mask[tmp] - 1; 3662 point = jj[nzcountb] - bs*tmp; 3663 idx = ishift + bs2*block + j + bs*point; 3664 a->a[idx] = (MatScalar)aa[nzcountb++]; 3665 } 3666 } 3667 /* zero out the mask elements we set */ 3668 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3669 } 3670 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3671 3672 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3673 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3674 ierr = PetscFree(aa);CHKERRQ(ierr); 3675 ierr = PetscFree(jj);CHKERRQ(ierr); 3676 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3677 3678 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3679 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3680 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3681 PetscFunctionReturn(0); 3682 } 3683 3684 #undef __FUNCT__ 3685 #define __FUNCT__ "MatCreateSeqBAIJ" 3686 /*@C 3687 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3688 compressed row) format. For good matrix assembly performance the 3689 user should preallocate the matrix storage by setting the parameter nz 3690 (or the array nnz). By setting these parameters accurately, performance 3691 during matrix assembly can be increased by more than a factor of 50. 3692 3693 Collective on MPI_Comm 3694 3695 Input Parameters: 3696 + comm - MPI communicator, set to PETSC_COMM_SELF 3697 . bs - size of block 3698 . m - number of rows 3699 . n - number of columns 3700 . nz - number of nonzero blocks per block row (same for all rows) 3701 - nnz - array containing the number of nonzero blocks in the various block rows 3702 (possibly different for each block row) or PETSC_NULL 3703 3704 Output Parameter: 3705 . A - the matrix 3706 3707 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3708 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3709 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3710 3711 Options Database Keys: 3712 . -mat_no_unroll - uses code that does not unroll the loops in the 3713 block calculations (much slower) 3714 . -mat_block_size - size of the blocks to use 3715 3716 Level: intermediate 3717 3718 Notes: 3719 The number of rows and columns must be divisible by blocksize. 3720 3721 If the nnz parameter is given then the nz parameter is ignored 3722 3723 A nonzero block is any block that as 1 or more nonzeros in it 3724 3725 The block AIJ format is fully compatible with standard Fortran 77 3726 storage. That is, the stored row and column indices can begin at 3727 either one (as in Fortran) or zero. See the users' manual for details. 3728 3729 Specify the preallocated storage with either nz or nnz (not both). 3730 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3731 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3732 matrices. 3733 3734 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3735 @*/ 3736 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3737 { 3738 PetscErrorCode ierr; 3739 3740 PetscFunctionBegin; 3741 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3742 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3743 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3744 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3745 PetscFunctionReturn(0); 3746 } 3747 3748 #undef __FUNCT__ 3749 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3750 /*@C 3751 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3752 per row in the matrix. For good matrix assembly performance the 3753 user should preallocate the matrix storage by setting the parameter nz 3754 (or the array nnz). By setting these parameters accurately, performance 3755 during matrix assembly can be increased by more than a factor of 50. 3756 3757 Collective on MPI_Comm 3758 3759 Input Parameters: 3760 + A - the matrix 3761 . bs - size of block 3762 . nz - number of block nonzeros per block row (same for all rows) 3763 - nnz - array containing the number of block nonzeros in the various block rows 3764 (possibly different for each block row) or PETSC_NULL 3765 3766 Options Database Keys: 3767 . -mat_no_unroll - uses code that does not unroll the loops in the 3768 block calculations (much slower) 3769 . -mat_block_size - size of the blocks to use 3770 3771 Level: intermediate 3772 3773 Notes: 3774 If the nnz parameter is given then the nz parameter is ignored 3775 3776 You can call MatGetInfo() to get information on how effective the preallocation was; 3777 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3778 You can also run with the option -info and look for messages with the string 3779 malloc in them to see if additional memory allocation was needed. 3780 3781 The block AIJ format is fully compatible with standard Fortran 77 3782 storage. That is, the stored row and column indices can begin at 3783 either one (as in Fortran) or zero. See the users' manual for details. 3784 3785 Specify the preallocated storage with either nz or nnz (not both). 3786 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3787 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3788 3789 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3790 @*/ 3791 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3792 { 3793 PetscErrorCode ierr; 3794 3795 PetscFunctionBegin; 3796 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3797 PetscFunctionReturn(0); 3798 } 3799 3800 #undef __FUNCT__ 3801 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3802 /*@C 3803 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3804 (the default sequential PETSc format). 3805 3806 Collective on MPI_Comm 3807 3808 Input Parameters: 3809 + A - the matrix 3810 . i - the indices into j for the start of each local row (starts with zero) 3811 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3812 - v - optional values in the matrix 3813 3814 Level: developer 3815 3816 .keywords: matrix, aij, compressed row, sparse 3817 3818 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3819 @*/ 3820 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3821 { 3822 PetscErrorCode ierr; 3823 3824 PetscFunctionBegin; 3825 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3826 PetscFunctionReturn(0); 3827 } 3828 3829 3830 #undef __FUNCT__ 3831 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3832 /*@ 3833 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3834 3835 Collective on MPI_Comm 3836 3837 Input Parameters: 3838 + comm - must be an MPI communicator of size 1 3839 . bs - size of block 3840 . m - number of rows 3841 . n - number of columns 3842 . i - row indices 3843 . j - column indices 3844 - a - matrix values 3845 3846 Output Parameter: 3847 . mat - the matrix 3848 3849 Level: advanced 3850 3851 Notes: 3852 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3853 once the matrix is destroyed 3854 3855 You cannot set new nonzero locations into this matrix, that will generate an error. 3856 3857 The i and j indices are 0 based 3858 3859 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3860 3861 3862 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3863 3864 @*/ 3865 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3866 { 3867 PetscErrorCode ierr; 3868 PetscInt ii; 3869 Mat_SeqBAIJ *baij; 3870 3871 PetscFunctionBegin; 3872 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3873 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3874 3875 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3876 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3877 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3878 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3879 baij = (Mat_SeqBAIJ*)(*mat)->data; 3880 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3881 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3882 3883 baij->i = i; 3884 baij->j = j; 3885 baij->a = a; 3886 baij->singlemalloc = PETSC_FALSE; 3887 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3888 baij->free_a = PETSC_FALSE; 3889 baij->free_ij = PETSC_FALSE; 3890 3891 for (ii=0; ii<m; ii++) { 3892 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3893 #if defined(PETSC_USE_DEBUG) 3894 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3895 #endif 3896 } 3897 #if defined(PETSC_USE_DEBUG) 3898 for (ii=0; ii<baij->i[m]; ii++) { 3899 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3900 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3901 } 3902 #endif 3903 3904 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3905 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3906 PetscFunctionReturn(0); 3907 } 3908