1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 default: 110 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 111 } 112 a->idiagvalid = PETSC_TRUE; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 120 { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 122 PetscScalar *x,x1,s1; 123 const PetscScalar *b; 124 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 125 PetscErrorCode ierr; 126 PetscInt m = a->mbs,i,i2,nz,idx; 127 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 128 129 PetscFunctionBegin; 130 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 131 its = its*lits; 132 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 133 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 134 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 135 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 136 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 137 138 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 139 140 diag = a->diag; 141 idiag = a->idiag; 142 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 143 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 144 145 if (flag & SOR_ZERO_INITIAL_GUESS) { 146 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 147 x[0] = b[0]*idiag[0]; 148 i2 = 1; 149 idiag += 1; 150 for (i=1; i<m; i++) { 151 v = aa + ai[i]; 152 vi = aj + ai[i]; 153 nz = diag[i] - ai[i]; 154 s1 = b[i2]; 155 while (nz--) { 156 idx = (*vi++); 157 x1 = x[idx]; 158 s1 -= v[0]*x1; 159 v += 1; 160 } 161 x[i2] = idiag[0]*s1; 162 idiag += 1; 163 i2 += 1; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; 174 x[i2] = mdiag[0]*x1; 175 mdiag += 1; 176 i2 += 1; 177 } 178 ierr = PetscLogFlops(m);CHKERRQ(ierr); 179 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 180 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 183 idiag = a->idiag+a->mbs - 1; 184 i2 = m - 1; 185 x1 = x[i2]; 186 x[i2] = idiag[0]*x1; 187 idiag -= 1; 188 i2 -= 1; 189 for (i=m-2; i>=0; i--) { 190 v = aa + (diag[i]+1); 191 vi = aj + diag[i] + 1; 192 nz = ai[i+1] - diag[i] - 1; 193 s1 = x[i2]; 194 while (nz--) { 195 idx = (*vi++); 196 x1 = x[idx]; 197 s1 -= v[0]*x1; 198 v += 1; 199 } 200 x[i2] = idiag[0]*s1; 201 idiag -= 2; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else { 207 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 208 } 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 PetscScalar *x,x1,x2,s1,s2; 220 const PetscScalar *b; 221 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 222 PetscErrorCode ierr; 223 PetscInt m = a->mbs,i,i2,nz,idx; 224 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225 226 PetscFunctionBegin; 227 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 228 its = its*lits; 229 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234 235 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 246 i2 = 2; 247 idiag += 4; 248 for (i=1; i<m; i++) { 249 v = aa + 4*ai[i]; 250 vi = aj + ai[i]; 251 nz = diag[i] - ai[i]; 252 s1 = b[i2]; s2 = b[i2+1]; 253 while (nz--) { 254 idx = 2*(*vi++); 255 x1 = x[idx]; x2 = x[1+idx]; 256 s1 -= v[0]*x1 + v[2]*x2; 257 s2 -= v[1]*x1 + v[3]*x2; 258 v += 4; 259 } 260 x[i2] = idiag[0]*s1 + idiag[2]*s2; 261 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 262 idiag += 4; 263 i2 += 2; 264 } 265 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 266 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 267 } 268 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 269 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 270 i2 = 0; 271 mdiag = a->idiag+4*a->mbs; 272 for (i=0; i<m; i++) { 273 x1 = x[i2]; x2 = x[i2+1]; 274 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 275 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 276 mdiag += 4; 277 i2 += 2; 278 } 279 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 280 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 281 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 282 } 283 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 284 idiag = a->idiag+4*a->mbs - 4; 285 i2 = 2*m - 2; 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = idiag[0]*x1 + idiag[2]*x2; 288 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 289 idiag -= 4; 290 i2 -= 2; 291 for (i=m-2; i>=0; i--) { 292 v = aa + 4*(diag[i]+1); 293 vi = aj + diag[i] + 1; 294 nz = ai[i+1] - diag[i] - 1; 295 s1 = x[i2]; s2 = x[i2+1]; 296 while (nz--) { 297 idx = 2*(*vi++); 298 x1 = x[idx]; x2 = x[1+idx]; 299 s1 -= v[0]*x1 + v[2]*x2; 300 s2 -= v[1]*x1 + v[3]*x2; 301 v += 4; 302 } 303 x[i2] = idiag[0]*s1 + idiag[2]*s2; 304 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 305 idiag -= 4; 306 i2 -= 2; 307 } 308 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 312 } 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 420 } 421 } else { 422 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 423 } 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 435 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 436 const PetscScalar *b; 437 PetscErrorCode ierr; 438 PetscInt m = a->mbs,i,i2,nz,idx; 439 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 440 441 PetscFunctionBegin; 442 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 443 its = its*lits; 444 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 445 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 446 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 447 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 448 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 449 450 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 451 452 diag = a->diag; 453 idiag = a->idiag; 454 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 455 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 456 457 if (flag & SOR_ZERO_INITIAL_GUESS) { 458 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 459 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 460 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 461 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 462 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 463 i2 = 4; 464 idiag += 16; 465 for (i=1; i<m; i++) { 466 v = aa + 16*ai[i]; 467 vi = aj + ai[i]; 468 nz = diag[i] - ai[i]; 469 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 473 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 474 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 475 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 476 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 477 v += 16; 478 } 479 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 480 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 481 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 482 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 483 idiag += 16; 484 i2 += 4; 485 } 486 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 487 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 488 } 489 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 490 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 491 i2 = 0; 492 mdiag = a->idiag+16*a->mbs; 493 for (i=0; i<m; i++) { 494 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 495 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 496 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 497 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 498 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 499 mdiag += 16; 500 i2 += 4; 501 } 502 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 503 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 504 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 505 } 506 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 507 idiag = a->idiag+16*a->mbs - 16; 508 i2 = 4*m - 4; 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 510 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 511 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 512 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 513 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 514 idiag -= 16; 515 i2 -= 4; 516 for (i=m-2; i>=0; i--) { 517 v = aa + 16*(diag[i]+1); 518 vi = aj + diag[i] + 1; 519 nz = ai[i+1] - diag[i] - 1; 520 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 521 while (nz--) { 522 idx = 4*(*vi++); 523 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 524 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 525 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 526 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 527 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 528 v += 16; 529 } 530 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 531 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 532 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 533 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 534 idiag -= 16; 535 i2 -= 4; 536 } 537 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 538 } 539 } else { 540 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 541 } 542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 550 { 551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 552 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 553 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 554 const PetscScalar *b; 555 PetscErrorCode ierr; 556 PetscInt m = a->mbs,i,i2,nz,idx; 557 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 558 559 PetscFunctionBegin; 560 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 561 its = its*lits; 562 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 563 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 564 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 565 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 566 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 567 568 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 569 570 diag = a->diag; 571 idiag = a->idiag; 572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 574 575 if (flag & SOR_ZERO_INITIAL_GUESS) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 577 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 578 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 579 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 580 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 581 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 582 i2 = 5; 583 idiag += 25; 584 for (i=1; i<m; i++) { 585 v = aa + 25*ai[i]; 586 vi = aj + ai[i]; 587 nz = diag[i] - ai[i]; 588 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 589 while (nz--) { 590 idx = 5*(*vi++); 591 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 592 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 593 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 594 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 595 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 596 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 597 v += 25; 598 } 599 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 600 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 601 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 602 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 603 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 604 idiag += 25; 605 i2 += 5; 606 } 607 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 608 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 609 } 610 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 611 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 612 i2 = 0; 613 mdiag = a->idiag+25*a->mbs; 614 for (i=0; i<m; i++) { 615 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 616 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 617 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 618 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 619 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 620 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 621 mdiag += 25; 622 i2 += 5; 623 } 624 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 625 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 626 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 627 } 628 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 629 idiag = a->idiag+25*a->mbs - 25; 630 i2 = 5*m - 5; 631 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 632 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 633 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 634 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 635 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 636 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 637 idiag -= 25; 638 i2 -= 5; 639 for (i=m-2; i>=0; i--) { 640 v = aa + 25*(diag[i]+1); 641 vi = aj + diag[i] + 1; 642 nz = ai[i+1] - diag[i] - 1; 643 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 644 while (nz--) { 645 idx = 5*(*vi++); 646 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 647 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 648 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 649 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 650 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 651 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 652 v += 25; 653 } 654 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 655 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 656 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 657 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 658 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 659 idiag -= 25; 660 i2 -= 5; 661 } 662 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 663 } 664 } else { 665 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 666 } 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 675 { 676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 677 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 678 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 679 const PetscScalar *b; 680 PetscErrorCode ierr; 681 PetscInt m = a->mbs,i,i2,nz,idx; 682 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 683 684 PetscFunctionBegin; 685 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 686 its = its*lits; 687 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 688 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 689 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 690 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 691 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 692 693 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 694 695 diag = a->diag; 696 idiag = a->idiag; 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 699 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 702 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 703 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 704 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 705 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 706 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 707 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 708 i2 = 6; 709 idiag += 36; 710 for (i=1; i<m; i++) { 711 v = aa + 36*ai[i]; 712 vi = aj + ai[i]; 713 nz = diag[i] - ai[i]; 714 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 715 while (nz--) { 716 idx = 6*(*vi++); 717 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 718 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 719 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 720 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 721 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 722 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 723 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 724 v += 36; 725 } 726 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 727 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 728 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 729 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 730 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 731 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 732 idiag += 36; 733 i2 += 6; 734 } 735 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 736 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 737 } 738 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 739 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 740 i2 = 0; 741 mdiag = a->idiag+36*a->mbs; 742 for (i=0; i<m; i++) { 743 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 744 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 745 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 746 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 747 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 748 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 749 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 750 mdiag += 36; 751 i2 += 6; 752 } 753 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 754 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 755 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 756 } 757 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 758 idiag = a->idiag+36*a->mbs - 36; 759 i2 = 6*m - 6; 760 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 761 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 762 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 763 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 764 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 765 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 766 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 767 idiag -= 36; 768 i2 -= 6; 769 for (i=m-2; i>=0; i--) { 770 v = aa + 36*(diag[i]+1); 771 vi = aj + diag[i] + 1; 772 nz = ai[i+1] - diag[i] - 1; 773 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 774 while (nz--) { 775 idx = 6*(*vi++); 776 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 777 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 778 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 779 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 780 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 781 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 782 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 783 v += 36; 784 } 785 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 786 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 787 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 788 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 789 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 790 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 791 idiag -= 36; 792 i2 -= 6; 793 } 794 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 795 } 796 } else { 797 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 798 } 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 810 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 811 const PetscScalar *b; 812 PetscErrorCode ierr; 813 PetscInt m = a->mbs,i,i2,nz,idx; 814 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 815 816 PetscFunctionBegin; 817 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 818 its = its*lits; 819 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 820 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 821 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 822 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 823 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 824 825 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 826 827 diag = a->diag; 828 idiag = a->idiag; 829 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 830 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 831 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 834 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 835 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 836 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 837 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 838 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 839 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 840 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 841 i2 = 7; 842 idiag += 49; 843 for (i=1; i<m; i++) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = diag[i] - ai[i]; 847 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 848 while (nz--) { 849 idx = 7*(*vi++); 850 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 851 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 852 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 853 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 854 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 855 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 856 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 857 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 858 v += 49; 859 } 860 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 861 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 862 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 863 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 864 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 865 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 866 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 867 idiag += 49; 868 i2 += 7; 869 } 870 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 871 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 872 } 873 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 874 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 875 i2 = 0; 876 mdiag = a->idiag+49*a->mbs; 877 for (i=0; i<m; i++) { 878 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 879 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 880 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 881 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 882 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 883 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 884 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 885 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 886 mdiag += 36; 887 i2 += 6; 888 } 889 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 890 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 891 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 894 idiag = a->idiag+49*a->mbs - 49; 895 i2 = 7*m - 7; 896 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 897 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 898 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 899 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 900 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 901 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 902 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 903 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 904 idiag -= 49; 905 i2 -= 7; 906 for (i=m-2; i>=0; i--) { 907 v = aa + 49*(diag[i]+1); 908 vi = aj + diag[i] + 1; 909 nz = ai[i+1] - diag[i] - 1; 910 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 911 while (nz--) { 912 idx = 7*(*vi++); 913 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 914 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 915 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 916 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 917 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 918 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 919 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 920 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 921 v += 49; 922 } 923 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 924 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 925 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 926 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 927 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 928 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 929 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 930 idiag -= 49; 931 i2 -= 7; 932 } 933 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 934 } 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 937 } 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /* 944 Special version for direct calls from Fortran (Used in PETSc-fun3d) 945 */ 946 #if defined(PETSC_HAVE_FORTRAN_CAPS) 947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 949 #define matsetvaluesblocked4_ matsetvaluesblocked4 950 #endif 951 952 EXTERN_C_BEGIN 953 #undef __FUNCT__ 954 #define __FUNCT__ "matsetvaluesblocked4_" 955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 956 { 957 Mat A = *AA; 958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 959 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 960 PetscInt *ai=a->i,*ailen=a->ilen; 961 PetscInt *aj=a->j,stepval,lastcol = -1; 962 const PetscScalar *value = v; 963 MatScalar *ap,*aa = a->a,*bap; 964 965 PetscFunctionBegin; 966 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 967 stepval = (n-1)*4; 968 for (k=0; k<m; k++) { /* loop over added rows */ 969 row = im[k]; 970 rp = aj + ai[row]; 971 ap = aa + 16*ai[row]; 972 nrow = ailen[row]; 973 low = 0; 974 high = nrow; 975 for (l=0; l<n; l++) { /* loop over added columns */ 976 col = in[l]; 977 if (col <= lastcol) low = 0; else high = nrow; 978 lastcol = col; 979 value = v + k*(stepval+4 + l)*4; 980 while (high-low > 7) { 981 t = (low+high)/2; 982 if (rp[t] > col) high = t; 983 else low = t; 984 } 985 for (i=low; i<high; i++) { 986 if (rp[i] > col) break; 987 if (rp[i] == col) { 988 bap = ap + 16*i; 989 for (ii=0; ii<4; ii++,value+=stepval) { 990 for (jj=ii; jj<16; jj+=4) { 991 bap[jj] += *value++; 992 } 993 } 994 goto noinsert2; 995 } 996 } 997 N = nrow++ - 1; 998 high++; /* added new column index thus must search to one higher than before */ 999 /* shift up all the later entries in this row */ 1000 for (ii=N; ii>=i; ii--) { 1001 rp[ii+1] = rp[ii]; 1002 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1003 } 1004 if (N >= i) { 1005 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1006 } 1007 rp[i] = col; 1008 bap = ap + 16*i; 1009 for (ii=0; ii<4; ii++,value+=stepval) { 1010 for (jj=ii; jj<16; jj+=4) { 1011 bap[jj] = *value++; 1012 } 1013 } 1014 noinsert2:; 1015 low = i; 1016 } 1017 ailen[row] = nrow; 1018 } 1019 PetscFunctionReturnVoid(); 1020 } 1021 EXTERN_C_END 1022 1023 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1024 #define matsetvalues4_ MATSETVALUES4 1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1026 #define matsetvalues4_ matsetvalues4 1027 #endif 1028 1029 EXTERN_C_BEGIN 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSetValues4_" 1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1033 { 1034 Mat A = *AA; 1035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1036 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1037 PetscInt *ai=a->i,*ailen=a->ilen; 1038 PetscInt *aj=a->j,brow,bcol; 1039 PetscInt ridx,cidx,lastcol = -1; 1040 MatScalar *ap,value,*aa=a->a,*bap; 1041 1042 PetscFunctionBegin; 1043 for (k=0; k<m; k++) { /* loop over added rows */ 1044 row = im[k]; brow = row/4; 1045 rp = aj + ai[brow]; 1046 ap = aa + 16*ai[brow]; 1047 nrow = ailen[brow]; 1048 low = 0; 1049 high = nrow; 1050 for (l=0; l<n; l++) { /* loop over added columns */ 1051 col = in[l]; bcol = col/4; 1052 ridx = row % 4; cidx = col % 4; 1053 value = v[l + k*n]; 1054 if (col <= lastcol) low = 0; else high = nrow; 1055 lastcol = col; 1056 while (high-low > 7) { 1057 t = (low+high)/2; 1058 if (rp[t] > bcol) high = t; 1059 else low = t; 1060 } 1061 for (i=low; i<high; i++) { 1062 if (rp[i] > bcol) break; 1063 if (rp[i] == bcol) { 1064 bap = ap + 16*i + 4*cidx + ridx; 1065 *bap += value; 1066 goto noinsert1; 1067 } 1068 } 1069 N = nrow++ - 1; 1070 high++; /* added new column thus must search to one higher than before */ 1071 /* shift up all the later entries in this row */ 1072 for (ii=N; ii>=i; ii--) { 1073 rp[ii+1] = rp[ii]; 1074 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1075 } 1076 if (N>=i) { 1077 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1078 } 1079 rp[i] = bcol; 1080 ap[16*i + 4*cidx + ridx] = value; 1081 noinsert1:; 1082 low = i; 1083 } 1084 ailen[brow] = nrow; 1085 } 1086 PetscFunctionReturnVoid(); 1087 } 1088 EXTERN_C_END 1089 1090 /* 1091 Checks for missing diagonals 1092 */ 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt *diag,*jj = a->j,i; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1103 *missing = PETSC_FALSE; 1104 diag = a->diag; 1105 for (i=0; i<a->mbs; i++) { 1106 if (jj[diag[i]] != i) { 1107 *missing = PETSC_TRUE; 1108 if (d) *d = i; 1109 } 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscInt i,j,m = a->mbs; 1121 1122 PetscFunctionBegin; 1123 if (!a->diag) { 1124 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1125 } 1126 for (i=0; i<m; i++) { 1127 a->diag[i] = a->i[i+1]; 1128 for (j=a->i[i]; j<a->i[i+1]; j++) { 1129 if (a->j[j] == i) { 1130 a->diag[i] = j; 1131 break; 1132 } 1133 } 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 1148 PetscInt *tia, *tja; 1149 1150 PetscFunctionBegin; 1151 *nn = n; 1152 if (!ia) PetscFunctionReturn(0); 1153 if (symmetric) { 1154 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1155 } else { 1156 tia = a->i; tja = a->j; 1157 } 1158 1159 if (!blockcompressed && bs > 1) { 1160 (*nn) *= bs; 1161 nbs = bs; 1162 /* malloc & create the natural set of indices */ 1163 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1164 if (n) { 1165 (*ia)[0] = 0; 1166 for (j=1; j<bs; j++) { 1167 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1168 } 1169 } 1170 1171 for (i=1; i<n; i++) { 1172 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1173 for (j=1; j<bs; j++) { 1174 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1175 } 1176 } 1177 if (n) { 1178 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1179 } 1180 1181 if (ja) { 1182 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<n; i++) { 1185 for (j=0; j<bs; j++) { 1186 for (k=tia[i]; k<tia[i+1]; k++) { 1187 for (l=0; l<bs; l++) { 1188 (*ja)[cnt++] = bs*tja[k] + l; 1189 } 1190 } 1191 } 1192 } 1193 } 1194 1195 n *= bs; 1196 nz *= bs*bs; 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 ierr = PetscFree(tia);CHKERRQ(ierr); 1199 ierr = PetscFree(tja);CHKERRQ(ierr); 1200 } 1201 } else { 1202 *ia = tia; 1203 if (ja) *ja = tja; 1204 } 1205 if (oshift == 1) { 1206 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1207 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt i,n = a->mbs,nz = a->i[n]; 1219 1220 PetscFunctionBegin; 1221 if (!ia) PetscFunctionReturn(0); 1222 if (!blockcompressed && A->rmap.bs > 1) { 1223 ierr = PetscFree(*ia);CHKERRQ(ierr); 1224 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1225 } else if (symmetric) { 1226 ierr = PetscFree(*ia);CHKERRQ(ierr); 1227 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1228 } else if (oshift == 1) { /* blockcompressed */ 1229 for (i=0; i<n+1; i++) a->i[i]--; 1230 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1238 { 1239 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 #if defined(PETSC_USE_LOG) 1244 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 1245 #endif 1246 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1247 if (a->row) { 1248 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1249 } 1250 if (a->col) { 1251 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1254 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1255 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1256 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1257 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1258 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1259 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1260 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1261 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1262 1263 ierr = PetscFree(a);CHKERRQ(ierr); 1264 1265 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1266 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1278 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 switch (op) { 1285 case MAT_ROW_ORIENTED: 1286 a->roworiented = flg; 1287 break; 1288 case MAT_KEEP_ZEROED_ROWS: 1289 a->keepzeroedrows = flg; 1290 break; 1291 case MAT_NEW_NONZERO_LOCATIONS: 1292 a->nonew = (flg ? 0 : 1); 1293 break; 1294 case MAT_NEW_NONZERO_LOCATION_ERR: 1295 a->nonew = (flg ? -1 : 0); 1296 break; 1297 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1298 a->nonew = (flg ? -2 : 0); 1299 break; 1300 case MAT_NEW_DIAGONALS: 1301 case MAT_IGNORE_OFF_PROC_ENTRIES: 1302 case MAT_USE_HASH_TABLE: 1303 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1304 break; 1305 case MAT_SYMMETRIC: 1306 case MAT_STRUCTURALLY_SYMMETRIC: 1307 case MAT_HERMITIAN: 1308 case MAT_SYMMETRY_ETERNAL: 1309 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1310 break; 1311 default: 1312 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1313 } 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1319 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1320 { 1321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1322 PetscErrorCode ierr; 1323 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1324 MatScalar *aa,*aa_i; 1325 PetscScalar *v_i; 1326 1327 PetscFunctionBegin; 1328 bs = A->rmap.bs; 1329 ai = a->i; 1330 aj = a->j; 1331 aa = a->a; 1332 bs2 = a->bs2; 1333 1334 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1335 1336 bn = row/bs; /* Block number */ 1337 bp = row % bs; /* Block Position */ 1338 M = ai[bn+1] - ai[bn]; 1339 *nz = bs*M; 1340 1341 if (v) { 1342 *v = 0; 1343 if (*nz) { 1344 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1345 for (i=0; i<M; i++) { /* for each block in the block row */ 1346 v_i = *v + i*bs; 1347 aa_i = aa + bs2*(ai[bn] + i); 1348 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1349 } 1350 } 1351 } 1352 1353 if (idx) { 1354 *idx = 0; 1355 if (*nz) { 1356 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1357 for (i=0; i<M; i++) { /* for each block in the block row */ 1358 idx_i = *idx + i*bs; 1359 itmp = bs*aj[ai[bn] + i]; 1360 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1361 } 1362 } 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1369 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1370 { 1371 PetscErrorCode ierr; 1372 1373 PetscFunctionBegin; 1374 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1375 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1381 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1382 { 1383 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1384 Mat C; 1385 PetscErrorCode ierr; 1386 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1387 PetscInt *rows,*cols,bs2=a->bs2; 1388 MatScalar *array; 1389 1390 PetscFunctionBegin; 1391 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1392 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1393 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1394 1395 array = a->a; 1396 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1397 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1398 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1399 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1400 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1401 ierr = PetscFree(col);CHKERRQ(ierr); 1402 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1403 cols = rows + bs; 1404 for (i=0; i<mbs; i++) { 1405 cols[0] = i*bs; 1406 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1407 len = ai[i+1] - ai[i]; 1408 for (j=0; j<len; j++) { 1409 rows[0] = (*aj++)*bs; 1410 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1411 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1412 array += bs2; 1413 } 1414 } 1415 ierr = PetscFree(rows);CHKERRQ(ierr); 1416 1417 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1418 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1419 1420 if (B) { 1421 *B = C; 1422 } else { 1423 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1430 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1435 int fd; 1436 PetscScalar *aa; 1437 FILE *file; 1438 1439 PetscFunctionBegin; 1440 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1441 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1442 col_lens[0] = MAT_FILE_COOKIE; 1443 1444 col_lens[1] = A->rmap.N; 1445 col_lens[2] = A->cmap.n; 1446 col_lens[3] = a->nz*bs2; 1447 1448 /* store lengths of each row and write (including header) to file */ 1449 count = 0; 1450 for (i=0; i<a->mbs; i++) { 1451 for (j=0; j<bs; j++) { 1452 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1453 } 1454 } 1455 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1456 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1457 1458 /* store column indices (zero start index) */ 1459 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1460 count = 0; 1461 for (i=0; i<a->mbs; i++) { 1462 for (j=0; j<bs; j++) { 1463 for (k=a->i[i]; k<a->i[i+1]; k++) { 1464 for (l=0; l<bs; l++) { 1465 jj[count++] = bs*a->j[k] + l; 1466 } 1467 } 1468 } 1469 } 1470 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1471 ierr = PetscFree(jj);CHKERRQ(ierr); 1472 1473 /* store nonzero values */ 1474 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1475 count = 0; 1476 for (i=0; i<a->mbs; i++) { 1477 for (j=0; j<bs; j++) { 1478 for (k=a->i[i]; k<a->i[i+1]; k++) { 1479 for (l=0; l<bs; l++) { 1480 aa[count++] = a->a[bs2*k + l*bs + j]; 1481 } 1482 } 1483 } 1484 } 1485 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1486 ierr = PetscFree(aa);CHKERRQ(ierr); 1487 1488 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1489 if (file) { 1490 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1491 } 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1497 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1498 { 1499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1500 PetscErrorCode ierr; 1501 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1502 PetscViewerFormat format; 1503 1504 PetscFunctionBegin; 1505 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1506 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1507 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1508 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1509 Mat aij; 1510 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1511 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1512 ierr = MatDestroy(aij);CHKERRQ(ierr); 1513 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1514 PetscFunctionReturn(0); 1515 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1516 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1517 for (i=0; i<a->mbs; i++) { 1518 for (j=0; j<bs; j++) { 1519 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1520 for (k=a->i[i]; k<a->i[i+1]; k++) { 1521 for (l=0; l<bs; l++) { 1522 #if defined(PETSC_USE_COMPLEX) 1523 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1524 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1525 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1526 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1527 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1528 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1529 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1531 } 1532 #else 1533 if (a->a[bs2*k + l*bs + j] != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1535 } 1536 #endif 1537 } 1538 } 1539 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1540 } 1541 } 1542 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1543 } else { 1544 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1545 for (i=0; i<a->mbs; i++) { 1546 for (j=0; j<bs; j++) { 1547 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1548 for (k=a->i[i]; k<a->i[i+1]; k++) { 1549 for (l=0; l<bs; l++) { 1550 #if defined(PETSC_USE_COMPLEX) 1551 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1552 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1553 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1554 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1555 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1556 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1557 } else { 1558 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1559 } 1560 #else 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1562 #endif 1563 } 1564 } 1565 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1566 } 1567 } 1568 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1569 } 1570 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1576 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1577 { 1578 Mat A = (Mat) Aa; 1579 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1580 PetscErrorCode ierr; 1581 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1582 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1583 MatScalar *aa; 1584 PetscViewer viewer; 1585 1586 PetscFunctionBegin; 1587 1588 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1589 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1590 1591 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1592 1593 /* loop over matrix elements drawing boxes */ 1594 color = PETSC_DRAW_BLUE; 1595 for (i=0,row=0; i<mbs; i++,row+=bs) { 1596 for (j=a->i[i]; j<a->i[i+1]; j++) { 1597 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1598 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1599 aa = a->a + j*bs2; 1600 for (k=0; k<bs; k++) { 1601 for (l=0; l<bs; l++) { 1602 if (PetscRealPart(*aa++) >= 0.) continue; 1603 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1604 } 1605 } 1606 } 1607 } 1608 color = PETSC_DRAW_CYAN; 1609 for (i=0,row=0; i<mbs; i++,row+=bs) { 1610 for (j=a->i[i]; j<a->i[i+1]; j++) { 1611 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1612 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1613 aa = a->a + j*bs2; 1614 for (k=0; k<bs; k++) { 1615 for (l=0; l<bs; l++) { 1616 if (PetscRealPart(*aa++) != 0.) continue; 1617 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1618 } 1619 } 1620 } 1621 } 1622 1623 color = PETSC_DRAW_RED; 1624 for (i=0,row=0; i<mbs; i++,row+=bs) { 1625 for (j=a->i[i]; j<a->i[i+1]; j++) { 1626 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1627 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1628 aa = a->a + j*bs2; 1629 for (k=0; k<bs; k++) { 1630 for (l=0; l<bs; l++) { 1631 if (PetscRealPart(*aa++) <= 0.) continue; 1632 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1633 } 1634 } 1635 } 1636 } 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1642 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1643 { 1644 PetscErrorCode ierr; 1645 PetscReal xl,yl,xr,yr,w,h; 1646 PetscDraw draw; 1647 PetscTruth isnull; 1648 1649 PetscFunctionBegin; 1650 1651 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1652 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1653 1654 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1655 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1656 xr += w; yr += h; xl = -w; yl = -h; 1657 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1658 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1659 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #undef __FUNCT__ 1664 #define __FUNCT__ "MatView_SeqBAIJ" 1665 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1666 { 1667 PetscErrorCode ierr; 1668 PetscTruth iascii,isbinary,isdraw; 1669 1670 PetscFunctionBegin; 1671 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1672 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1673 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1674 if (iascii){ 1675 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1676 } else if (isbinary) { 1677 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1678 } else if (isdraw) { 1679 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1680 } else { 1681 Mat B; 1682 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1683 ierr = MatView(B,viewer);CHKERRQ(ierr); 1684 ierr = MatDestroy(B);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1692 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1693 { 1694 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1695 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1696 PetscInt *ai = a->i,*ailen = a->ilen; 1697 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1698 MatScalar *ap,*aa = a->a; 1699 1700 PetscFunctionBegin; 1701 for (k=0; k<m; k++) { /* loop over rows */ 1702 row = im[k]; brow = row/bs; 1703 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1704 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1705 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1706 nrow = ailen[brow]; 1707 for (l=0; l<n; l++) { /* loop over columns */ 1708 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1709 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1710 col = in[l] ; 1711 bcol = col/bs; 1712 cidx = col%bs; 1713 ridx = row%bs; 1714 high = nrow; 1715 low = 0; /* assume unsorted */ 1716 while (high-low > 5) { 1717 t = (low+high)/2; 1718 if (rp[t] > bcol) high = t; 1719 else low = t; 1720 } 1721 for (i=low; i<high; i++) { 1722 if (rp[i] > bcol) break; 1723 if (rp[i] == bcol) { 1724 *v++ = ap[bs2*i+bs*cidx+ridx]; 1725 goto finished; 1726 } 1727 } 1728 *v++ = 0.0; 1729 finished:; 1730 } 1731 } 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #define CHUNKSIZE 10 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1738 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1739 { 1740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1741 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1742 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1743 PetscErrorCode ierr; 1744 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1745 PetscTruth roworiented=a->roworiented; 1746 const PetscScalar *value = v; 1747 MatScalar *ap,*aa = a->a,*bap; 1748 1749 PetscFunctionBegin; 1750 if (roworiented) { 1751 stepval = (n-1)*bs; 1752 } else { 1753 stepval = (m-1)*bs; 1754 } 1755 for (k=0; k<m; k++) { /* loop over added rows */ 1756 row = im[k]; 1757 if (row < 0) continue; 1758 #if defined(PETSC_USE_DEBUG) 1759 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1760 #endif 1761 rp = aj + ai[row]; 1762 ap = aa + bs2*ai[row]; 1763 rmax = imax[row]; 1764 nrow = ailen[row]; 1765 low = 0; 1766 high = nrow; 1767 for (l=0; l<n; l++) { /* loop over added columns */ 1768 if (in[l] < 0) continue; 1769 #if defined(PETSC_USE_DEBUG) 1770 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1771 #endif 1772 col = in[l]; 1773 if (roworiented) { 1774 value = v + k*(stepval+bs)*bs + l*bs; 1775 } else { 1776 value = v + l*(stepval+bs)*bs + k*bs; 1777 } 1778 if (col <= lastcol) low = 0; else high = nrow; 1779 lastcol = col; 1780 while (high-low > 7) { 1781 t = (low+high)/2; 1782 if (rp[t] > col) high = t; 1783 else low = t; 1784 } 1785 for (i=low; i<high; i++) { 1786 if (rp[i] > col) break; 1787 if (rp[i] == col) { 1788 bap = ap + bs2*i; 1789 if (roworiented) { 1790 if (is == ADD_VALUES) { 1791 for (ii=0; ii<bs; ii++,value+=stepval) { 1792 for (jj=ii; jj<bs2; jj+=bs) { 1793 bap[jj] += *value++; 1794 } 1795 } 1796 } else { 1797 for (ii=0; ii<bs; ii++,value+=stepval) { 1798 for (jj=ii; jj<bs2; jj+=bs) { 1799 bap[jj] = *value++; 1800 } 1801 } 1802 } 1803 } else { 1804 if (is == ADD_VALUES) { 1805 for (ii=0; ii<bs; ii++,value+=stepval) { 1806 for (jj=0; jj<bs; jj++) { 1807 *bap++ += *value++; 1808 } 1809 } 1810 } else { 1811 for (ii=0; ii<bs; ii++,value+=stepval) { 1812 for (jj=0; jj<bs; jj++) { 1813 *bap++ = *value++; 1814 } 1815 } 1816 } 1817 } 1818 goto noinsert2; 1819 } 1820 } 1821 if (nonew == 1) goto noinsert2; 1822 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1823 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1824 N = nrow++ - 1; high++; 1825 /* shift up all the later entries in this row */ 1826 for (ii=N; ii>=i; ii--) { 1827 rp[ii+1] = rp[ii]; 1828 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1829 } 1830 if (N >= i) { 1831 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1832 } 1833 rp[i] = col; 1834 bap = ap + bs2*i; 1835 if (roworiented) { 1836 for (ii=0; ii<bs; ii++,value+=stepval) { 1837 for (jj=ii; jj<bs2; jj+=bs) { 1838 bap[jj] = *value++; 1839 } 1840 } 1841 } else { 1842 for (ii=0; ii<bs; ii++,value+=stepval) { 1843 for (jj=0; jj<bs; jj++) { 1844 *bap++ = *value++; 1845 } 1846 } 1847 } 1848 noinsert2:; 1849 low = i; 1850 } 1851 ailen[row] = nrow; 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 #undef __FUNCT__ 1857 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1858 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1859 { 1860 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1861 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1862 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1863 PetscErrorCode ierr; 1864 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1865 MatScalar *aa = a->a,*ap; 1866 PetscReal ratio=0.6; 1867 1868 PetscFunctionBegin; 1869 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1870 1871 if (m) rmax = ailen[0]; 1872 for (i=1; i<mbs; i++) { 1873 /* move each row back by the amount of empty slots (fshift) before it*/ 1874 fshift += imax[i-1] - ailen[i-1]; 1875 rmax = PetscMax(rmax,ailen[i]); 1876 if (fshift) { 1877 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1878 N = ailen[i]; 1879 for (j=0; j<N; j++) { 1880 ip[j-fshift] = ip[j]; 1881 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1882 } 1883 } 1884 ai[i] = ai[i-1] + ailen[i-1]; 1885 } 1886 if (mbs) { 1887 fshift += imax[mbs-1] - ailen[mbs-1]; 1888 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1889 } 1890 /* reset ilen and imax for each row */ 1891 for (i=0; i<mbs; i++) { 1892 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1893 } 1894 a->nz = ai[mbs]; 1895 1896 /* diagonals may have moved, so kill the diagonal pointers */ 1897 a->idiagvalid = PETSC_FALSE; 1898 if (fshift && a->diag) { 1899 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1900 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1901 a->diag = 0; 1902 } 1903 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1904 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1905 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1906 a->reallocs = 0; 1907 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1908 1909 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1910 if (a->compressedrow.use){ 1911 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1912 } 1913 1914 A->same_nonzero = PETSC_TRUE; 1915 PetscFunctionReturn(0); 1916 } 1917 1918 /* 1919 This function returns an array of flags which indicate the locations of contiguous 1920 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1921 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1922 Assume: sizes should be long enough to hold all the values. 1923 */ 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1926 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1927 { 1928 PetscInt i,j,k,row; 1929 PetscTruth flg; 1930 1931 PetscFunctionBegin; 1932 for (i=0,j=0; i<n; j++) { 1933 row = idx[i]; 1934 if (row%bs!=0) { /* Not the begining of a block */ 1935 sizes[j] = 1; 1936 i++; 1937 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1938 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1939 i++; 1940 } else { /* Begining of the block, so check if the complete block exists */ 1941 flg = PETSC_TRUE; 1942 for (k=1; k<bs; k++) { 1943 if (row+k != idx[i+k]) { /* break in the block */ 1944 flg = PETSC_FALSE; 1945 break; 1946 } 1947 } 1948 if (flg) { /* No break in the bs */ 1949 sizes[j] = bs; 1950 i+= bs; 1951 } else { 1952 sizes[j] = 1; 1953 i++; 1954 } 1955 } 1956 } 1957 *bs_max = j; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1963 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1964 { 1965 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1966 PetscErrorCode ierr; 1967 PetscInt i,j,k,count,*rows; 1968 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1969 PetscScalar zero = 0.0; 1970 MatScalar *aa; 1971 1972 PetscFunctionBegin; 1973 /* Make a copy of the IS and sort it */ 1974 /* allocate memory for rows,sizes */ 1975 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1976 sizes = rows + is_n; 1977 1978 /* copy IS values to rows, and sort them */ 1979 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1980 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1981 if (baij->keepzeroedrows) { 1982 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1983 bs_max = is_n; 1984 A->same_nonzero = PETSC_TRUE; 1985 } else { 1986 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1987 A->same_nonzero = PETSC_FALSE; 1988 } 1989 1990 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1991 row = rows[j]; 1992 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1993 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1994 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1995 if (sizes[i] == bs && !baij->keepzeroedrows) { 1996 if (diag != 0.0) { 1997 if (baij->ilen[row/bs] > 0) { 1998 baij->ilen[row/bs] = 1; 1999 baij->j[baij->i[row/bs]] = row/bs; 2000 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2001 } 2002 /* Now insert all the diagonal values for this bs */ 2003 for (k=0; k<bs; k++) { 2004 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2005 } 2006 } else { /* (diag == 0.0) */ 2007 baij->ilen[row/bs] = 0; 2008 } /* end (diag == 0.0) */ 2009 } else { /* (sizes[i] != bs) */ 2010 #if defined (PETSC_USE_DEBUG) 2011 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2012 #endif 2013 for (k=0; k<count; k++) { 2014 aa[0] = zero; 2015 aa += bs; 2016 } 2017 if (diag != 0.0) { 2018 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2019 } 2020 } 2021 } 2022 2023 ierr = PetscFree(rows);CHKERRQ(ierr); 2024 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2025 PetscFunctionReturn(0); 2026 } 2027 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2030 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2031 { 2032 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2033 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2034 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2035 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 2036 PetscErrorCode ierr; 2037 PetscInt ridx,cidx,bs2=a->bs2; 2038 PetscTruth roworiented=a->roworiented; 2039 MatScalar *ap,value,*aa=a->a,*bap; 2040 2041 PetscFunctionBegin; 2042 for (k=0; k<m; k++) { /* loop over added rows */ 2043 row = im[k]; 2044 brow = row/bs; 2045 if (row < 0) continue; 2046 #if defined(PETSC_USE_DEBUG) 2047 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 2048 #endif 2049 rp = aj + ai[brow]; 2050 ap = aa + bs2*ai[brow]; 2051 rmax = imax[brow]; 2052 nrow = ailen[brow]; 2053 low = 0; 2054 high = nrow; 2055 for (l=0; l<n; l++) { /* loop over added columns */ 2056 if (in[l] < 0) continue; 2057 #if defined(PETSC_USE_DEBUG) 2058 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 2059 #endif 2060 col = in[l]; bcol = col/bs; 2061 ridx = row % bs; cidx = col % bs; 2062 if (roworiented) { 2063 value = v[l + k*n]; 2064 } else { 2065 value = v[k + l*m]; 2066 } 2067 if (col <= lastcol) low = 0; else high = nrow; 2068 lastcol = col; 2069 while (high-low > 7) { 2070 t = (low+high)/2; 2071 if (rp[t] > bcol) high = t; 2072 else low = t; 2073 } 2074 for (i=low; i<high; i++) { 2075 if (rp[i] > bcol) break; 2076 if (rp[i] == bcol) { 2077 bap = ap + bs2*i + bs*cidx + ridx; 2078 if (is == ADD_VALUES) *bap += value; 2079 else *bap = value; 2080 goto noinsert1; 2081 } 2082 } 2083 if (nonew == 1) goto noinsert1; 2084 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2085 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2086 N = nrow++ - 1; high++; 2087 /* shift up all the later entries in this row */ 2088 for (ii=N; ii>=i; ii--) { 2089 rp[ii+1] = rp[ii]; 2090 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2091 } 2092 if (N>=i) { 2093 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2094 } 2095 rp[i] = bcol; 2096 ap[bs2*i + bs*cidx + ridx] = value; 2097 a->nz++; 2098 noinsert1:; 2099 low = i; 2100 } 2101 ailen[brow] = nrow; 2102 } 2103 A->same_nonzero = PETSC_FALSE; 2104 PetscFunctionReturn(0); 2105 } 2106 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2110 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 2111 { 2112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2113 Mat outA; 2114 PetscErrorCode ierr; 2115 PetscTruth row_identity,col_identity; 2116 2117 PetscFunctionBegin; 2118 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2119 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2120 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2121 if (!row_identity || !col_identity) { 2122 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2123 } 2124 2125 outA = inA; 2126 inA->factor = FACTOR_LU; 2127 2128 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2129 2130 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2131 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2132 a->row = row; 2133 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2134 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2135 a->col = col; 2136 2137 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2138 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2139 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2140 2141 /* 2142 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 2143 for ILU(0) factorization with natural ordering 2144 */ 2145 if (inA->rmap.bs < 8) { 2146 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 2147 } else { 2148 if (!a->solve_work) { 2149 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2150 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2151 } 2152 } 2153 2154 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 2155 2156 PetscFunctionReturn(0); 2157 } 2158 2159 EXTERN_C_BEGIN 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2162 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2163 { 2164 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2165 PetscInt i,nz,nbs; 2166 2167 PetscFunctionBegin; 2168 nz = baij->maxnz/baij->bs2; 2169 nbs = baij->nbs; 2170 for (i=0; i<nz; i++) { 2171 baij->j[i] = indices[i]; 2172 } 2173 baij->nz = nz; 2174 for (i=0; i<nbs; i++) { 2175 baij->ilen[i] = baij->imax[i]; 2176 } 2177 2178 PetscFunctionReturn(0); 2179 } 2180 EXTERN_C_END 2181 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2184 /*@ 2185 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2186 in the matrix. 2187 2188 Input Parameters: 2189 + mat - the SeqBAIJ matrix 2190 - indices - the column indices 2191 2192 Level: advanced 2193 2194 Notes: 2195 This can be called if you have precomputed the nonzero structure of the 2196 matrix and want to provide it to the matrix object to improve the performance 2197 of the MatSetValues() operation. 2198 2199 You MUST have set the correct numbers of nonzeros per row in the call to 2200 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2201 2202 MUST be called before any calls to MatSetValues(); 2203 2204 @*/ 2205 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2206 { 2207 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2208 2209 PetscFunctionBegin; 2210 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2211 PetscValidPointer(indices,2); 2212 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2213 if (f) { 2214 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2215 } else { 2216 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2223 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2224 { 2225 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2226 PetscErrorCode ierr; 2227 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2228 PetscReal atmp; 2229 PetscScalar *x,zero = 0.0; 2230 MatScalar *aa; 2231 PetscInt ncols,brow,krow,kcol; 2232 2233 PetscFunctionBegin; 2234 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2235 bs = A->rmap.bs; 2236 aa = a->a; 2237 ai = a->i; 2238 aj = a->j; 2239 mbs = a->mbs; 2240 2241 ierr = VecSet(v,zero);CHKERRQ(ierr); 2242 if (idx) { 2243 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 2244 } 2245 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2246 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2247 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2248 for (i=0; i<mbs; i++) { 2249 ncols = ai[1] - ai[0]; ai++; 2250 brow = bs*i; 2251 for (j=0; j<ncols; j++){ 2252 for (kcol=0; kcol<bs; kcol++){ 2253 for (krow=0; krow<bs; krow++){ 2254 atmp = PetscAbsScalar(*aa);aa++; 2255 row = brow + krow; /* row index */ 2256 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2257 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2258 } 2259 } 2260 aj++; 2261 } 2262 } 2263 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatCopy_SeqBAIJ" 2269 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2270 { 2271 PetscErrorCode ierr; 2272 2273 PetscFunctionBegin; 2274 /* If the two matrices have the same copy implementation, use fast copy. */ 2275 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2276 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2277 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2278 2279 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 2280 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2281 } 2282 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 2283 } else { 2284 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2285 } 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2291 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2292 { 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2302 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2303 { 2304 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2305 PetscFunctionBegin; 2306 *array = a->a; 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2312 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2313 { 2314 PetscFunctionBegin; 2315 PetscFunctionReturn(0); 2316 } 2317 2318 #include "petscblaslapack.h" 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2321 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2322 { 2323 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2324 PetscErrorCode ierr; 2325 PetscInt i,bs=Y->rmap.bs,j,bs2; 2326 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2327 2328 PetscFunctionBegin; 2329 if (str == SAME_NONZERO_PATTERN) { 2330 PetscScalar alpha = a; 2331 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2332 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2333 if (y->xtoy && y->XtoY != X) { 2334 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2335 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2336 } 2337 if (!y->xtoy) { /* get xtoy */ 2338 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2339 y->XtoY = X; 2340 } 2341 bs2 = bs*bs; 2342 for (i=0; i<x->nz; i++) { 2343 j = 0; 2344 while (j < bs2){ 2345 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2346 j++; 2347 } 2348 } 2349 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2350 } else { 2351 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2352 } 2353 PetscFunctionReturn(0); 2354 } 2355 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2358 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2359 { 2360 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2361 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2362 MatScalar *aa = a->a; 2363 2364 PetscFunctionBegin; 2365 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2371 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2372 { 2373 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2374 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2375 MatScalar *aa = a->a; 2376 2377 PetscFunctionBegin; 2378 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 2383 /* -------------------------------------------------------------------*/ 2384 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2385 MatGetRow_SeqBAIJ, 2386 MatRestoreRow_SeqBAIJ, 2387 MatMult_SeqBAIJ_N, 2388 /* 4*/ MatMultAdd_SeqBAIJ_N, 2389 MatMultTranspose_SeqBAIJ, 2390 MatMultTransposeAdd_SeqBAIJ, 2391 MatSolve_SeqBAIJ_N, 2392 0, 2393 0, 2394 /*10*/ 0, 2395 MatLUFactor_SeqBAIJ, 2396 0, 2397 0, 2398 MatTranspose_SeqBAIJ, 2399 /*15*/ MatGetInfo_SeqBAIJ, 2400 MatEqual_SeqBAIJ, 2401 MatGetDiagonal_SeqBAIJ, 2402 MatDiagonalScale_SeqBAIJ, 2403 MatNorm_SeqBAIJ, 2404 /*20*/ 0, 2405 MatAssemblyEnd_SeqBAIJ, 2406 0, 2407 MatSetOption_SeqBAIJ, 2408 MatZeroEntries_SeqBAIJ, 2409 /*25*/ MatZeroRows_SeqBAIJ, 2410 MatLUFactorSymbolic_SeqBAIJ, 2411 MatLUFactorNumeric_SeqBAIJ_N, 2412 MatCholeskyFactorSymbolic_SeqBAIJ, 2413 MatCholeskyFactorNumeric_SeqBAIJ_N, 2414 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2415 MatILUFactorSymbolic_SeqBAIJ, 2416 MatICCFactorSymbolic_SeqBAIJ, 2417 MatGetArray_SeqBAIJ, 2418 MatRestoreArray_SeqBAIJ, 2419 /*35*/ MatDuplicate_SeqBAIJ, 2420 0, 2421 0, 2422 MatILUFactor_SeqBAIJ, 2423 0, 2424 /*40*/ MatAXPY_SeqBAIJ, 2425 MatGetSubMatrices_SeqBAIJ, 2426 MatIncreaseOverlap_SeqBAIJ, 2427 MatGetValues_SeqBAIJ, 2428 MatCopy_SeqBAIJ, 2429 /*45*/ 0, 2430 MatScale_SeqBAIJ, 2431 0, 2432 0, 2433 0, 2434 /*50*/ 0, 2435 MatGetRowIJ_SeqBAIJ, 2436 MatRestoreRowIJ_SeqBAIJ, 2437 0, 2438 0, 2439 /*55*/ 0, 2440 0, 2441 0, 2442 0, 2443 MatSetValuesBlocked_SeqBAIJ, 2444 /*60*/ MatGetSubMatrix_SeqBAIJ, 2445 MatDestroy_SeqBAIJ, 2446 MatView_SeqBAIJ, 2447 0, 2448 0, 2449 /*65*/ 0, 2450 0, 2451 0, 2452 0, 2453 0, 2454 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2455 MatConvert_Basic, 2456 0, 2457 0, 2458 0, 2459 /*75*/ 0, 2460 0, 2461 0, 2462 0, 2463 0, 2464 /*80*/ 0, 2465 0, 2466 0, 2467 0, 2468 MatLoad_SeqBAIJ, 2469 /*85*/ 0, 2470 0, 2471 0, 2472 0, 2473 0, 2474 /*90*/ 0, 2475 0, 2476 0, 2477 0, 2478 0, 2479 /*95*/ 0, 2480 0, 2481 0, 2482 0, 2483 0, 2484 /*100*/0, 2485 0, 2486 0, 2487 0, 2488 0, 2489 /*105*/0, 2490 MatRealPart_SeqBAIJ, 2491 MatImaginaryPart_SeqBAIJ, 2492 0, 2493 0, 2494 /*110*/0, 2495 0, 2496 0, 2497 0, 2498 MatMissingDiagonal_SeqBAIJ 2499 /*115*/ 2500 }; 2501 2502 EXTERN_C_BEGIN 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2505 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2506 { 2507 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2508 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2509 PetscErrorCode ierr; 2510 2511 PetscFunctionBegin; 2512 if (aij->nonew != 1) { 2513 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2514 } 2515 2516 /* allocate space for values if not already there */ 2517 if (!aij->saved_values) { 2518 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2519 } 2520 2521 /* copy values over */ 2522 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 EXTERN_C_END 2526 2527 EXTERN_C_BEGIN 2528 #undef __FUNCT__ 2529 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2530 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2531 { 2532 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2533 PetscErrorCode ierr; 2534 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2535 2536 PetscFunctionBegin; 2537 if (aij->nonew != 1) { 2538 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2539 } 2540 if (!aij->saved_values) { 2541 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2542 } 2543 2544 /* copy values over */ 2545 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 EXTERN_C_END 2549 2550 EXTERN_C_BEGIN 2551 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2552 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2553 EXTERN_C_END 2554 2555 EXTERN_C_BEGIN 2556 #undef __FUNCT__ 2557 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2558 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2559 { 2560 Mat_SeqBAIJ *b; 2561 PetscErrorCode ierr; 2562 PetscInt i,mbs,nbs,bs2,newbs = bs; 2563 PetscTruth flg,skipallocation = PETSC_FALSE; 2564 2565 PetscFunctionBegin; 2566 2567 if (nz == MAT_SKIP_ALLOCATION) { 2568 skipallocation = PETSC_TRUE; 2569 nz = 0; 2570 } 2571 2572 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2573 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2574 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2575 2576 if (nnz && newbs != bs) { 2577 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2578 } 2579 bs = newbs; 2580 2581 B->rmap.bs = B->cmap.bs = bs; 2582 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2583 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2584 2585 B->preallocated = PETSC_TRUE; 2586 2587 mbs = B->rmap.n/bs; 2588 nbs = B->cmap.n/bs; 2589 bs2 = bs*bs; 2590 2591 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2592 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2593 } 2594 2595 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2596 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2597 if (nnz) { 2598 for (i=0; i<mbs; i++) { 2599 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2600 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2601 } 2602 } 2603 2604 b = (Mat_SeqBAIJ*)B->data; 2605 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2606 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2607 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2608 2609 B->ops->solve = MatSolve_SeqBAIJ_Update; 2610 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2611 if (!flg) { 2612 switch (bs) { 2613 case 1: 2614 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2615 B->ops->mult = MatMult_SeqBAIJ_1; 2616 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2617 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2618 break; 2619 case 2: 2620 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2621 B->ops->mult = MatMult_SeqBAIJ_2; 2622 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2623 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2624 break; 2625 case 3: 2626 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2627 B->ops->mult = MatMult_SeqBAIJ_3; 2628 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2629 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2630 break; 2631 case 4: 2632 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2633 B->ops->mult = MatMult_SeqBAIJ_4; 2634 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2635 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2636 break; 2637 case 5: 2638 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2639 B->ops->mult = MatMult_SeqBAIJ_5; 2640 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2641 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2642 break; 2643 case 6: 2644 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2645 B->ops->mult = MatMult_SeqBAIJ_6; 2646 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2647 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2648 break; 2649 case 7: 2650 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2651 B->ops->mult = MatMult_SeqBAIJ_7; 2652 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2653 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2654 break; 2655 default: 2656 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2657 B->ops->mult = MatMult_SeqBAIJ_N; 2658 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2659 break; 2660 } 2661 } 2662 B->rmap.bs = bs; 2663 b->mbs = mbs; 2664 b->nbs = nbs; 2665 if (!skipallocation) { 2666 if (!b->imax) { 2667 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2668 } 2669 /* b->ilen will count nonzeros in each block row so far. */ 2670 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2671 if (!nnz) { 2672 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2673 else if (nz <= 0) nz = 1; 2674 for (i=0; i<mbs; i++) b->imax[i] = nz; 2675 nz = nz*mbs; 2676 } else { 2677 nz = 0; 2678 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2679 } 2680 2681 /* allocate the matrix space */ 2682 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2683 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2684 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2685 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2686 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2687 b->singlemalloc = PETSC_TRUE; 2688 b->i[0] = 0; 2689 for (i=1; i<mbs+1; i++) { 2690 b->i[i] = b->i[i-1] + b->imax[i-1]; 2691 } 2692 b->free_a = PETSC_TRUE; 2693 b->free_ij = PETSC_TRUE; 2694 } else { 2695 b->free_a = PETSC_FALSE; 2696 b->free_ij = PETSC_FALSE; 2697 } 2698 2699 B->rmap.bs = bs; 2700 b->bs2 = bs2; 2701 b->mbs = mbs; 2702 b->nz = 0; 2703 b->maxnz = nz*bs2; 2704 B->info.nz_unneeded = (PetscReal)b->maxnz; 2705 PetscFunctionReturn(0); 2706 } 2707 EXTERN_C_END 2708 2709 /*MC 2710 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2711 block sparse compressed row format. 2712 2713 Options Database Keys: 2714 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2715 2716 Level: beginner 2717 2718 .seealso: MatCreateSeqBAIJ() 2719 M*/ 2720 2721 EXTERN_C_BEGIN 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatCreate_SeqBAIJ" 2724 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2725 { 2726 PetscErrorCode ierr; 2727 PetscMPIInt size; 2728 Mat_SeqBAIJ *b; 2729 2730 PetscFunctionBegin; 2731 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2732 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2733 2734 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2735 B->data = (void*)b; 2736 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2737 B->factor = 0; 2738 B->mapping = 0; 2739 b->row = 0; 2740 b->col = 0; 2741 b->icol = 0; 2742 b->reallocs = 0; 2743 b->saved_values = 0; 2744 2745 b->roworiented = PETSC_TRUE; 2746 b->nonew = 0; 2747 b->diag = 0; 2748 b->solve_work = 0; 2749 b->mult_work = 0; 2750 B->spptr = 0; 2751 B->info.nz_unneeded = (PetscReal)b->maxnz; 2752 b->keepzeroedrows = PETSC_FALSE; 2753 b->xtoy = 0; 2754 b->XtoY = 0; 2755 b->compressedrow.use = PETSC_FALSE; 2756 b->compressedrow.nrows = 0; 2757 b->compressedrow.i = PETSC_NULL; 2758 b->compressedrow.rindex = PETSC_NULL; 2759 b->compressedrow.checked = PETSC_FALSE; 2760 B->same_nonzero = PETSC_FALSE; 2761 2762 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2763 "MatInvertBlockDiagonal_SeqBAIJ", 2764 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2765 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2766 "MatStoreValues_SeqBAIJ", 2767 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2768 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2769 "MatRetrieveValues_SeqBAIJ", 2770 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2771 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2772 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2773 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2774 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2775 "MatConvert_SeqBAIJ_SeqAIJ", 2776 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2777 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2778 "MatConvert_SeqBAIJ_SeqSBAIJ", 2779 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2780 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2781 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2782 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2783 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 EXTERN_C_END 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2790 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2791 { 2792 Mat C; 2793 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2794 PetscErrorCode ierr; 2795 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2796 2797 PetscFunctionBegin; 2798 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2799 2800 *B = 0; 2801 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2802 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2803 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2804 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2805 c = (Mat_SeqBAIJ*)C->data; 2806 2807 C->rmap.N = A->rmap.N; 2808 C->cmap.N = A->cmap.N; 2809 C->rmap.bs = A->rmap.bs; 2810 c->bs2 = a->bs2; 2811 c->mbs = a->mbs; 2812 c->nbs = a->nbs; 2813 2814 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2815 for (i=0; i<mbs; i++) { 2816 c->imax[i] = a->imax[i]; 2817 c->ilen[i] = a->ilen[i]; 2818 } 2819 2820 /* allocate the matrix space */ 2821 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2822 c->singlemalloc = PETSC_TRUE; 2823 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2824 if (mbs > 0) { 2825 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2826 if (cpvalues == MAT_COPY_VALUES) { 2827 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2828 } else { 2829 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2830 } 2831 } 2832 c->roworiented = a->roworiented; 2833 c->nonew = a->nonew; 2834 2835 if (a->diag) { 2836 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2837 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2838 for (i=0; i<mbs; i++) { 2839 c->diag[i] = a->diag[i]; 2840 } 2841 } else c->diag = 0; 2842 c->nz = a->nz; 2843 c->maxnz = a->maxnz; 2844 c->solve_work = 0; 2845 c->mult_work = 0; 2846 c->free_a = PETSC_TRUE; 2847 c->free_ij = PETSC_TRUE; 2848 C->preallocated = PETSC_TRUE; 2849 C->assembled = PETSC_TRUE; 2850 2851 c->compressedrow.use = a->compressedrow.use; 2852 c->compressedrow.nrows = a->compressedrow.nrows; 2853 c->compressedrow.checked = a->compressedrow.checked; 2854 if ( a->compressedrow.checked && a->compressedrow.use){ 2855 i = a->compressedrow.nrows; 2856 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2857 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2858 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2859 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2860 } else { 2861 c->compressedrow.use = PETSC_FALSE; 2862 c->compressedrow.i = PETSC_NULL; 2863 c->compressedrow.rindex = PETSC_NULL; 2864 } 2865 C->same_nonzero = A->same_nonzero; 2866 *B = C; 2867 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2868 PetscFunctionReturn(0); 2869 } 2870 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatLoad_SeqBAIJ" 2873 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2874 { 2875 Mat_SeqBAIJ *a; 2876 Mat B; 2877 PetscErrorCode ierr; 2878 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2879 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2880 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2881 PetscInt *masked,nmask,tmp,bs2,ishift; 2882 PetscMPIInt size; 2883 int fd; 2884 PetscScalar *aa; 2885 MPI_Comm comm = ((PetscObject)viewer)->comm; 2886 2887 PetscFunctionBegin; 2888 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2889 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2890 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2891 bs2 = bs*bs; 2892 2893 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2894 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2895 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2896 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2897 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2898 M = header[1]; N = header[2]; nz = header[3]; 2899 2900 if (header[3] < 0) { 2901 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2902 } 2903 2904 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2905 2906 /* 2907 This code adds extra rows to make sure the number of rows is 2908 divisible by the blocksize 2909 */ 2910 mbs = M/bs; 2911 extra_rows = bs - M + bs*(mbs); 2912 if (extra_rows == bs) extra_rows = 0; 2913 else mbs++; 2914 if (extra_rows) { 2915 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2916 } 2917 2918 /* read in row lengths */ 2919 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2920 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2921 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2922 2923 /* read in column indices */ 2924 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2925 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2926 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2927 2928 /* loop over row lengths determining block row lengths */ 2929 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2930 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2931 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2932 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2933 masked = mask + mbs; 2934 rowcount = 0; nzcount = 0; 2935 for (i=0; i<mbs; i++) { 2936 nmask = 0; 2937 for (j=0; j<bs; j++) { 2938 kmax = rowlengths[rowcount]; 2939 for (k=0; k<kmax; k++) { 2940 tmp = jj[nzcount++]/bs; 2941 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2942 } 2943 rowcount++; 2944 } 2945 browlengths[i] += nmask; 2946 /* zero out the mask elements we set */ 2947 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2948 } 2949 2950 /* create our matrix */ 2951 ierr = MatCreate(comm,&B); 2952 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2953 ierr = MatSetType(B,type);CHKERRQ(ierr); 2954 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2955 a = (Mat_SeqBAIJ*)B->data; 2956 2957 /* set matrix "i" values */ 2958 a->i[0] = 0; 2959 for (i=1; i<= mbs; i++) { 2960 a->i[i] = a->i[i-1] + browlengths[i-1]; 2961 a->ilen[i-1] = browlengths[i-1]; 2962 } 2963 a->nz = 0; 2964 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2965 2966 /* read in nonzero values */ 2967 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2968 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2969 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2970 2971 /* set "a" and "j" values into matrix */ 2972 nzcount = 0; jcount = 0; 2973 for (i=0; i<mbs; i++) { 2974 nzcountb = nzcount; 2975 nmask = 0; 2976 for (j=0; j<bs; j++) { 2977 kmax = rowlengths[i*bs+j]; 2978 for (k=0; k<kmax; k++) { 2979 tmp = jj[nzcount++]/bs; 2980 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2981 } 2982 } 2983 /* sort the masked values */ 2984 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2985 2986 /* set "j" values into matrix */ 2987 maskcount = 1; 2988 for (j=0; j<nmask; j++) { 2989 a->j[jcount++] = masked[j]; 2990 mask[masked[j]] = maskcount++; 2991 } 2992 /* set "a" values into matrix */ 2993 ishift = bs2*a->i[i]; 2994 for (j=0; j<bs; j++) { 2995 kmax = rowlengths[i*bs+j]; 2996 for (k=0; k<kmax; k++) { 2997 tmp = jj[nzcountb]/bs ; 2998 block = mask[tmp] - 1; 2999 point = jj[nzcountb] - bs*tmp; 3000 idx = ishift + bs2*block + j + bs*point; 3001 a->a[idx] = (MatScalar)aa[nzcountb++]; 3002 } 3003 } 3004 /* zero out the mask elements we set */ 3005 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3006 } 3007 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3008 3009 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3010 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3011 ierr = PetscFree(aa);CHKERRQ(ierr); 3012 ierr = PetscFree(jj);CHKERRQ(ierr); 3013 ierr = PetscFree(mask);CHKERRQ(ierr); 3014 3015 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3016 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3017 ierr = MatView_Private(B);CHKERRQ(ierr); 3018 3019 *A = B; 3020 PetscFunctionReturn(0); 3021 } 3022 3023 #undef __FUNCT__ 3024 #define __FUNCT__ "MatCreateSeqBAIJ" 3025 /*@C 3026 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3027 compressed row) format. For good matrix assembly performance the 3028 user should preallocate the matrix storage by setting the parameter nz 3029 (or the array nnz). By setting these parameters accurately, performance 3030 during matrix assembly can be increased by more than a factor of 50. 3031 3032 Collective on MPI_Comm 3033 3034 Input Parameters: 3035 + comm - MPI communicator, set to PETSC_COMM_SELF 3036 . bs - size of block 3037 . m - number of rows 3038 . n - number of columns 3039 . nz - number of nonzero blocks per block row (same for all rows) 3040 - nnz - array containing the number of nonzero blocks in the various block rows 3041 (possibly different for each block row) or PETSC_NULL 3042 3043 Output Parameter: 3044 . A - the matrix 3045 3046 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3047 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3048 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3049 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3050 3051 Options Database Keys: 3052 . -mat_no_unroll - uses code that does not unroll the loops in the 3053 block calculations (much slower) 3054 . -mat_block_size - size of the blocks to use 3055 3056 Level: intermediate 3057 3058 Notes: 3059 The number of rows and columns must be divisible by blocksize. 3060 3061 If the nnz parameter is given then the nz parameter is ignored 3062 3063 A nonzero block is any block that as 1 or more nonzeros in it 3064 3065 The block AIJ format is fully compatible with standard Fortran 77 3066 storage. That is, the stored row and column indices can begin at 3067 either one (as in Fortran) or zero. See the users' manual for details. 3068 3069 Specify the preallocated storage with either nz or nnz (not both). 3070 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3071 allocation. For additional details, see the users manual chapter on 3072 matrices. 3073 3074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3075 @*/ 3076 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3077 { 3078 PetscErrorCode ierr; 3079 3080 PetscFunctionBegin; 3081 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3082 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3083 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3084 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3085 PetscFunctionReturn(0); 3086 } 3087 3088 #undef __FUNCT__ 3089 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3090 /*@C 3091 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3092 per row in the matrix. For good matrix assembly performance the 3093 user should preallocate the matrix storage by setting the parameter nz 3094 (or the array nnz). By setting these parameters accurately, performance 3095 during matrix assembly can be increased by more than a factor of 50. 3096 3097 Collective on MPI_Comm 3098 3099 Input Parameters: 3100 + A - the matrix 3101 . bs - size of block 3102 . nz - number of block nonzeros per block row (same for all rows) 3103 - nnz - array containing the number of block nonzeros in the various block rows 3104 (possibly different for each block row) or PETSC_NULL 3105 3106 Options Database Keys: 3107 . -mat_no_unroll - uses code that does not unroll the loops in the 3108 block calculations (much slower) 3109 . -mat_block_size - size of the blocks to use 3110 3111 Level: intermediate 3112 3113 Notes: 3114 If the nnz parameter is given then the nz parameter is ignored 3115 3116 You can call MatGetInfo() to get information on how effective the preallocation was; 3117 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3118 You can also run with the option -info and look for messages with the string 3119 malloc in them to see if additional memory allocation was needed. 3120 3121 The block AIJ format is fully compatible with standard Fortran 77 3122 storage. That is, the stored row and column indices can begin at 3123 either one (as in Fortran) or zero. See the users' manual for details. 3124 3125 Specify the preallocated storage with either nz or nnz (not both). 3126 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3127 allocation. For additional details, see the users manual chapter on 3128 matrices. 3129 3130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3131 @*/ 3132 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3133 { 3134 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3135 3136 PetscFunctionBegin; 3137 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3138 if (f) { 3139 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3140 } 3141 PetscFunctionReturn(0); 3142 } 3143 3144 #undef __FUNCT__ 3145 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3146 /*@ 3147 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3148 (upper triangular entries in CSR format) provided by the user. 3149 3150 Collective on MPI_Comm 3151 3152 Input Parameters: 3153 + comm - must be an MPI communicator of size 1 3154 . bs - size of block 3155 . m - number of rows 3156 . n - number of columns 3157 . i - row indices 3158 . j - column indices 3159 - a - matrix values 3160 3161 Output Parameter: 3162 . mat - the matrix 3163 3164 Level: intermediate 3165 3166 Notes: 3167 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3168 once the matrix is destroyed 3169 3170 You cannot set new nonzero locations into this matrix, that will generate an error. 3171 3172 The i and j indices are 0 based 3173 3174 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3175 3176 @*/ 3177 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3178 { 3179 PetscErrorCode ierr; 3180 PetscInt ii; 3181 Mat_SeqBAIJ *baij; 3182 3183 PetscFunctionBegin; 3184 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3185 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3186 3187 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3188 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3189 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3190 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3191 baij = (Mat_SeqBAIJ*)(*mat)->data; 3192 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3193 3194 baij->i = i; 3195 baij->j = j; 3196 baij->a = a; 3197 baij->singlemalloc = PETSC_FALSE; 3198 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3199 baij->free_a = PETSC_FALSE; 3200 baij->free_ij = PETSC_FALSE; 3201 3202 for (ii=0; ii<m; ii++) { 3203 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3204 #if defined(PETSC_USE_DEBUG) 3205 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3206 #endif 3207 } 3208 #if defined(PETSC_USE_DEBUG) 3209 for (ii=0; ii<baij->i[m]; ii++) { 3210 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3211 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3212 } 3213 #endif 3214 3215 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219