1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 case 6: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 36*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 114 diag += 36; 115 mdiag += 36; 116 } 117 break; 118 default: 119 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 120 } 121 a->idiagvalid = PETSC_TRUE; 122 PetscFunctionReturn(0); 123 } 124 EXTERN_C_END 125 126 #undef __FUNCT__ 127 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 128 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 129 { 130 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 131 PetscScalar *x,x1,s1; 132 const PetscScalar *b; 133 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 134 PetscErrorCode ierr; 135 PetscInt m = a->mbs,i,i2,nz,idx; 136 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 137 138 PetscFunctionBegin; 139 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 140 its = its*lits; 141 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 142 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 143 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 144 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 145 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 146 147 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 148 149 diag = a->diag; 150 idiag = a->idiag; 151 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 152 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 153 154 if (flag & SOR_ZERO_INITIAL_GUESS) { 155 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 156 x[0] = b[0]*idiag[0]; 157 i2 = 1; 158 idiag += 1; 159 for (i=1; i<m; i++) { 160 v = aa + ai[i]; 161 vi = aj + ai[i]; 162 nz = diag[i] - ai[i]; 163 s1 = b[i2]; 164 while (nz--) { 165 idx = (*vi++); 166 x1 = x[idx]; 167 s1 -= v[0]*x1; 168 v += 1; 169 } 170 x[i2] = idiag[0]*s1; 171 idiag += 1; 172 i2 += 1; 173 } 174 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 175 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 176 } 177 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 178 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 179 i2 = 0; 180 mdiag = a->idiag+a->mbs; 181 for (i=0; i<m; i++) { 182 x1 = x[i2]; 183 x[i2] = mdiag[0]*x1; 184 mdiag += 1; 185 i2 += 1; 186 } 187 ierr = PetscLogFlops(m);CHKERRQ(ierr); 188 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 189 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 190 } 191 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 192 idiag = a->idiag+a->mbs - 1; 193 i2 = m - 1; 194 x1 = x[i2]; 195 x[i2] = idiag[0]*x1; 196 idiag -= 1; 197 i2 -= 1; 198 for (i=m-2; i>=0; i--) { 199 v = aa + (diag[i]+1); 200 vi = aj + diag[i] + 1; 201 nz = ai[i+1] - diag[i] - 1; 202 s1 = x[i2]; 203 while (nz--) { 204 idx = (*vi++); 205 x1 = x[idx]; 206 s1 -= v[0]*x1; 207 v += 1; 208 } 209 x[i2] = idiag[0]*s1; 210 idiag -= 2; 211 i2 -= 1; 212 } 213 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 214 } 215 } else { 216 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 217 } 218 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 219 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 220 PetscFunctionReturn(0); 221 } 222 223 #undef __FUNCT__ 224 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 225 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 226 { 227 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 228 PetscScalar *x,x1,x2,s1,s2; 229 const PetscScalar *b; 230 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 231 PetscErrorCode ierr; 232 PetscInt m = a->mbs,i,i2,nz,idx; 233 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 234 235 PetscFunctionBegin; 236 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 237 its = its*lits; 238 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 239 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 240 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 241 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 242 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 243 244 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 245 246 diag = a->diag; 247 idiag = a->idiag; 248 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 249 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 253 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 254 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 255 i2 = 2; 256 idiag += 4; 257 for (i=1; i<m; i++) { 258 v = aa + 4*ai[i]; 259 vi = aj + ai[i]; 260 nz = diag[i] - ai[i]; 261 s1 = b[i2]; s2 = b[i2+1]; 262 while (nz--) { 263 idx = 2*(*vi++); 264 x1 = x[idx]; x2 = x[1+idx]; 265 s1 -= v[0]*x1 + v[2]*x2; 266 s2 -= v[1]*x1 + v[3]*x2; 267 v += 4; 268 } 269 x[i2] = idiag[0]*s1 + idiag[2]*s2; 270 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 271 idiag += 4; 272 i2 += 2; 273 } 274 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 275 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 276 } 277 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 278 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 279 i2 = 0; 280 mdiag = a->idiag+4*a->mbs; 281 for (i=0; i<m; i++) { 282 x1 = x[i2]; x2 = x[i2+1]; 283 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 284 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 285 mdiag += 4; 286 i2 += 2; 287 } 288 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 289 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 290 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 291 } 292 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 293 idiag = a->idiag+4*a->mbs - 4; 294 i2 = 2*m - 2; 295 x1 = x[i2]; x2 = x[i2+1]; 296 x[i2] = idiag[0]*x1 + idiag[2]*x2; 297 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 298 idiag -= 4; 299 i2 -= 2; 300 for (i=m-2; i>=0; i--) { 301 v = aa + 4*(diag[i]+1); 302 vi = aj + diag[i] + 1; 303 nz = ai[i+1] - diag[i] - 1; 304 s1 = x[i2]; s2 = x[i2+1]; 305 while (nz--) { 306 idx = 2*(*vi++); 307 x1 = x[idx]; x2 = x[1+idx]; 308 s1 -= v[0]*x1 + v[2]*x2; 309 s2 -= v[1]*x1 + v[3]*x2; 310 v += 4; 311 } 312 x[i2] = idiag[0]*s1 + idiag[2]*s2; 313 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 314 idiag -= 4; 315 i2 -= 2; 316 } 317 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 329 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,s1,s2,s3; 333 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 334 const PetscScalar *b; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 342 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 343 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 344 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 345 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 346 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 347 348 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 349 350 diag = a->diag; 351 idiag = a->idiag; 352 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 353 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 354 355 if (flag & SOR_ZERO_INITIAL_GUESS) { 356 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 357 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 358 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 359 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 360 i2 = 3; 361 idiag += 9; 362 for (i=1; i<m; i++) { 363 v = aa + 9*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 367 while (nz--) { 368 idx = 3*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 370 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 371 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 372 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 373 v += 9; 374 } 375 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 376 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 377 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 378 idiag += 9; 379 i2 += 3; 380 } 381 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 382 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 383 } 384 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 385 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 386 i2 = 0; 387 mdiag = a->idiag+9*a->mbs; 388 for (i=0; i<m; i++) { 389 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 390 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 391 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 392 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 393 mdiag += 9; 394 i2 += 3; 395 } 396 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 397 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 398 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 399 } 400 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 401 idiag = a->idiag+9*a->mbs - 9; 402 i2 = 3*m - 3; 403 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 404 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 405 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 406 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 407 idiag -= 9; 408 i2 -= 3; 409 for (i=m-2; i>=0; i--) { 410 v = aa + 9*(diag[i]+1); 411 vi = aj + diag[i] + 1; 412 nz = ai[i+1] - diag[i] - 1; 413 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 414 while (nz--) { 415 idx = 3*(*vi++); 416 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 417 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 418 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 419 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 420 v += 9; 421 } 422 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 423 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 424 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 425 idiag -= 9; 426 i2 -= 3; 427 } 428 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 429 } 430 } else { 431 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 432 } 433 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 434 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 440 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 441 { 442 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 443 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 444 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 445 const PetscScalar *b; 446 PetscErrorCode ierr; 447 PetscInt m = a->mbs,i,i2,nz,idx; 448 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 449 450 PetscFunctionBegin; 451 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 452 its = its*lits; 453 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 454 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 455 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 456 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 457 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 458 459 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 460 461 diag = a->diag; 462 idiag = a->idiag; 463 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 464 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 465 466 if (flag & SOR_ZERO_INITIAL_GUESS) { 467 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 468 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 469 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 470 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 471 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 472 i2 = 4; 473 idiag += 16; 474 for (i=1; i<m; i++) { 475 v = aa + 16*ai[i]; 476 vi = aj + ai[i]; 477 nz = diag[i] - ai[i]; 478 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 479 while (nz--) { 480 idx = 4*(*vi++); 481 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 482 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 483 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 484 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 485 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 486 v += 16; 487 } 488 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 489 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 490 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 491 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 492 idiag += 16; 493 i2 += 4; 494 } 495 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 496 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 497 } 498 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 499 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 500 i2 = 0; 501 mdiag = a->idiag+16*a->mbs; 502 for (i=0; i<m; i++) { 503 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 504 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 505 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 506 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 507 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 508 mdiag += 16; 509 i2 += 4; 510 } 511 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 512 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 513 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 514 } 515 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 516 idiag = a->idiag+16*a->mbs - 16; 517 i2 = 4*m - 4; 518 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 519 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 520 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 521 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 522 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 523 idiag -= 16; 524 i2 -= 4; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 16*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 530 while (nz--) { 531 idx = 4*(*vi++); 532 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 533 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 534 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 535 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 536 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 537 v += 16; 538 } 539 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 540 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 541 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 542 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 543 idiag -= 16; 544 i2 -= 4; 545 } 546 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 547 } 548 } else { 549 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 550 } 551 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 552 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 553 PetscFunctionReturn(0); 554 } 555 556 #undef __FUNCT__ 557 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 558 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 559 { 560 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 561 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 562 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 563 const PetscScalar *b; 564 PetscErrorCode ierr; 565 PetscInt m = a->mbs,i,i2,nz,idx; 566 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 567 568 PetscFunctionBegin; 569 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 570 its = its*lits; 571 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 572 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 573 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 574 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 575 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 576 577 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 578 579 diag = a->diag; 580 idiag = a->idiag; 581 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 582 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 583 584 if (flag & SOR_ZERO_INITIAL_GUESS) { 585 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 586 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 587 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 588 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 589 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 590 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 591 i2 = 5; 592 idiag += 25; 593 for (i=1; i<m; i++) { 594 v = aa + 25*ai[i]; 595 vi = aj + ai[i]; 596 nz = diag[i] - ai[i]; 597 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 598 while (nz--) { 599 idx = 5*(*vi++); 600 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 601 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 602 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 603 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 604 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 605 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 606 v += 25; 607 } 608 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 609 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 610 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 611 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 612 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 613 idiag += 25; 614 i2 += 5; 615 } 616 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 617 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 618 } 619 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 620 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 621 i2 = 0; 622 mdiag = a->idiag+25*a->mbs; 623 for (i=0; i<m; i++) { 624 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 625 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 626 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 627 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 628 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 629 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 630 mdiag += 25; 631 i2 += 5; 632 } 633 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 634 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 635 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 636 } 637 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 638 idiag = a->idiag+25*a->mbs - 25; 639 i2 = 5*m - 5; 640 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 641 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 642 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 643 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 644 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 645 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 646 idiag -= 25; 647 i2 -= 5; 648 for (i=m-2; i>=0; i--) { 649 v = aa + 25*(diag[i]+1); 650 vi = aj + diag[i] + 1; 651 nz = ai[i+1] - diag[i] - 1; 652 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 653 while (nz--) { 654 idx = 5*(*vi++); 655 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 656 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 657 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 658 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 659 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 660 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 661 v += 25; 662 } 663 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 664 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 665 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 666 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 667 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 668 idiag -= 25; 669 i2 -= 5; 670 } 671 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 672 } 673 } else { 674 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 675 } 676 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 677 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 683 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 684 { 685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 686 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 687 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 688 const PetscScalar *b; 689 PetscErrorCode ierr; 690 PetscInt m = a->mbs,i,i2,nz,idx; 691 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 692 693 PetscFunctionBegin; 694 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 695 its = its*lits; 696 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 697 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 698 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 699 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 700 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 701 702 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 703 704 diag = a->diag; 705 idiag = a->idiag; 706 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 707 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 708 709 if (flag & SOR_ZERO_INITIAL_GUESS) { 710 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 711 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 712 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 713 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 714 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 715 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 716 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 717 i2 = 6; 718 idiag += 36; 719 for (i=1; i<m; i++) { 720 v = aa + 36*ai[i]; 721 vi = aj + ai[i]; 722 nz = diag[i] - ai[i]; 723 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 724 while (nz--) { 725 idx = 6*(*vi++); 726 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 727 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 728 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 729 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 730 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 731 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 732 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 733 v += 36; 734 } 735 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 736 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 737 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 738 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 739 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 740 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 741 idiag += 36; 742 i2 += 6; 743 } 744 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 745 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 746 } 747 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 748 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 749 i2 = 0; 750 mdiag = a->idiag+36*a->mbs; 751 for (i=0; i<m; i++) { 752 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 753 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 754 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 755 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 756 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 757 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 758 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 759 mdiag += 36; 760 i2 += 6; 761 } 762 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 763 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 764 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 765 } 766 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 767 idiag = a->idiag+36*a->mbs - 36; 768 i2 = 6*m - 6; 769 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 770 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 771 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 772 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 773 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 774 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 775 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 776 idiag -= 36; 777 i2 -= 6; 778 for (i=m-2; i>=0; i--) { 779 v = aa + 36*(diag[i]+1); 780 vi = aj + diag[i] + 1; 781 nz = ai[i+1] - diag[i] - 1; 782 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 783 while (nz--) { 784 idx = 6*(*vi++); 785 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 786 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 787 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 788 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 789 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 790 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 791 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 792 v += 36; 793 } 794 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 795 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 796 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 797 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 798 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 799 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 800 idiag -= 36; 801 i2 -= 6; 802 } 803 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 804 } 805 } else { 806 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 807 } 808 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 809 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 810 PetscFunctionReturn(0); 811 } 812 813 #undef __FUNCT__ 814 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 815 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 816 { 817 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 818 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 819 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 820 const PetscScalar *b; 821 PetscErrorCode ierr; 822 PetscInt m = a->mbs,i,i2,nz,idx; 823 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 824 825 PetscFunctionBegin; 826 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 827 its = its*lits; 828 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 829 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 830 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 831 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 832 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 833 834 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 835 836 diag = a->diag; 837 idiag = a->idiag; 838 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 839 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 840 841 if (flag & SOR_ZERO_INITIAL_GUESS) { 842 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 843 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 844 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 845 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 846 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 847 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 848 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 849 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 850 i2 = 7; 851 idiag += 49; 852 for (i=1; i<m; i++) { 853 v = aa + 49*ai[i]; 854 vi = aj + ai[i]; 855 nz = diag[i] - ai[i]; 856 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 857 while (nz--) { 858 idx = 7*(*vi++); 859 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 860 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 861 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 862 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 863 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 864 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 865 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 866 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 867 v += 49; 868 } 869 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 870 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 871 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 872 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 873 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 874 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 875 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 876 idiag += 49; 877 i2 += 7; 878 } 879 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 880 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 881 } 882 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 883 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 884 i2 = 0; 885 mdiag = a->idiag+49*a->mbs; 886 for (i=0; i<m; i++) { 887 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 888 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 889 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 890 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 891 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 892 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 893 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 894 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 895 mdiag += 36; 896 i2 += 6; 897 } 898 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 899 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 900 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 901 } 902 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 903 idiag = a->idiag+49*a->mbs - 49; 904 i2 = 7*m - 7; 905 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 906 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 907 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 908 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 909 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 910 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 911 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 912 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 913 idiag -= 49; 914 i2 -= 7; 915 for (i=m-2; i>=0; i--) { 916 v = aa + 49*(diag[i]+1); 917 vi = aj + diag[i] + 1; 918 nz = ai[i+1] - diag[i] - 1; 919 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 920 while (nz--) { 921 idx = 7*(*vi++); 922 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 923 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 924 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 925 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 926 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 927 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 928 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 929 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 930 v += 49; 931 } 932 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 933 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 934 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 935 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 936 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 937 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 938 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 939 idiag -= 49; 940 i2 -= 7; 941 } 942 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 943 } 944 } else { 945 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 946 } 947 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 948 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 /* 953 Special version for direct calls from Fortran (Used in PETSc-fun3d) 954 */ 955 #if defined(PETSC_HAVE_FORTRAN_CAPS) 956 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 957 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 958 #define matsetvaluesblocked4_ matsetvaluesblocked4 959 #endif 960 961 EXTERN_C_BEGIN 962 #undef __FUNCT__ 963 #define __FUNCT__ "matsetvaluesblocked4_" 964 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 965 { 966 Mat A = *AA; 967 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 968 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 969 PetscInt *ai=a->i,*ailen=a->ilen; 970 PetscInt *aj=a->j,stepval,lastcol = -1; 971 const PetscScalar *value = v; 972 MatScalar *ap,*aa = a->a,*bap; 973 974 PetscFunctionBegin; 975 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 976 stepval = (n-1)*4; 977 for (k=0; k<m; k++) { /* loop over added rows */ 978 row = im[k]; 979 rp = aj + ai[row]; 980 ap = aa + 16*ai[row]; 981 nrow = ailen[row]; 982 low = 0; 983 high = nrow; 984 for (l=0; l<n; l++) { /* loop over added columns */ 985 col = in[l]; 986 if (col <= lastcol) low = 0; else high = nrow; 987 lastcol = col; 988 value = v + k*(stepval+4 + l)*4; 989 while (high-low > 7) { 990 t = (low+high)/2; 991 if (rp[t] > col) high = t; 992 else low = t; 993 } 994 for (i=low; i<high; i++) { 995 if (rp[i] > col) break; 996 if (rp[i] == col) { 997 bap = ap + 16*i; 998 for (ii=0; ii<4; ii++,value+=stepval) { 999 for (jj=ii; jj<16; jj+=4) { 1000 bap[jj] += *value++; 1001 } 1002 } 1003 goto noinsert2; 1004 } 1005 } 1006 N = nrow++ - 1; 1007 high++; /* added new column index thus must search to one higher than before */ 1008 /* shift up all the later entries in this row */ 1009 for (ii=N; ii>=i; ii--) { 1010 rp[ii+1] = rp[ii]; 1011 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1012 } 1013 if (N >= i) { 1014 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1015 } 1016 rp[i] = col; 1017 bap = ap + 16*i; 1018 for (ii=0; ii<4; ii++,value+=stepval) { 1019 for (jj=ii; jj<16; jj+=4) { 1020 bap[jj] = *value++; 1021 } 1022 } 1023 noinsert2:; 1024 low = i; 1025 } 1026 ailen[row] = nrow; 1027 } 1028 PetscFunctionReturnVoid(); 1029 } 1030 EXTERN_C_END 1031 1032 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1033 #define matsetvalues4_ MATSETVALUES4 1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1035 #define matsetvalues4_ matsetvalues4 1036 #endif 1037 1038 EXTERN_C_BEGIN 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatSetValues4_" 1041 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1042 { 1043 Mat A = *AA; 1044 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1045 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1046 PetscInt *ai=a->i,*ailen=a->ilen; 1047 PetscInt *aj=a->j,brow,bcol; 1048 PetscInt ridx,cidx,lastcol = -1; 1049 MatScalar *ap,value,*aa=a->a,*bap; 1050 1051 PetscFunctionBegin; 1052 for (k=0; k<m; k++) { /* loop over added rows */ 1053 row = im[k]; brow = row/4; 1054 rp = aj + ai[brow]; 1055 ap = aa + 16*ai[brow]; 1056 nrow = ailen[brow]; 1057 low = 0; 1058 high = nrow; 1059 for (l=0; l<n; l++) { /* loop over added columns */ 1060 col = in[l]; bcol = col/4; 1061 ridx = row % 4; cidx = col % 4; 1062 value = v[l + k*n]; 1063 if (col <= lastcol) low = 0; else high = nrow; 1064 lastcol = col; 1065 while (high-low > 7) { 1066 t = (low+high)/2; 1067 if (rp[t] > bcol) high = t; 1068 else low = t; 1069 } 1070 for (i=low; i<high; i++) { 1071 if (rp[i] > bcol) break; 1072 if (rp[i] == bcol) { 1073 bap = ap + 16*i + 4*cidx + ridx; 1074 *bap += value; 1075 goto noinsert1; 1076 } 1077 } 1078 N = nrow++ - 1; 1079 high++; /* added new column thus must search to one higher than before */ 1080 /* shift up all the later entries in this row */ 1081 for (ii=N; ii>=i; ii--) { 1082 rp[ii+1] = rp[ii]; 1083 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1084 } 1085 if (N>=i) { 1086 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1087 } 1088 rp[i] = bcol; 1089 ap[16*i + 4*cidx + ridx] = value; 1090 noinsert1:; 1091 low = i; 1092 } 1093 ailen[brow] = nrow; 1094 } 1095 PetscFunctionReturnVoid(); 1096 } 1097 EXTERN_C_END 1098 1099 /* 1100 Checks for missing diagonals 1101 */ 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1104 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscInt *diag,*jj = a->j,i; 1109 1110 PetscFunctionBegin; 1111 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1112 *missing = PETSC_FALSE; 1113 if (A->rmap->n > 0 && !jj) { 1114 *missing = PETSC_TRUE; 1115 if (d) *d = 0; 1116 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1117 } else { 1118 diag = a->diag; 1119 for (i=0; i<a->mbs; i++) { 1120 if (jj[diag[i]] != i) { 1121 *missing = PETSC_TRUE; 1122 if (d) *d = i; 1123 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1124 } 1125 } 1126 } 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1132 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1133 { 1134 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 PetscInt i,j,m = a->mbs; 1137 1138 PetscFunctionBegin; 1139 if (!a->diag) { 1140 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1141 } 1142 for (i=0; i<m; i++) { 1143 a->diag[i] = a->i[i+1]; 1144 for (j=a->i[i]; j<a->i[i+1]; j++) { 1145 if (a->j[j] == i) { 1146 a->diag[i] = j; 1147 break; 1148 } 1149 } 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 1155 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1159 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1160 { 1161 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1162 PetscErrorCode ierr; 1163 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1164 PetscInt *tia, *tja; 1165 1166 PetscFunctionBegin; 1167 *nn = n; 1168 if (!ia) PetscFunctionReturn(0); 1169 if (symmetric) { 1170 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1171 } else { 1172 tia = a->i; tja = a->j; 1173 } 1174 1175 if (!blockcompressed && bs > 1) { 1176 (*nn) *= bs; 1177 nbs = bs; 1178 /* malloc & create the natural set of indices */ 1179 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1180 if (n) { 1181 (*ia)[0] = 0; 1182 for (j=1; j<bs; j++) { 1183 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1184 } 1185 } 1186 1187 for (i=1; i<n; i++) { 1188 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1189 for (j=1; j<bs; j++) { 1190 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1191 } 1192 } 1193 if (n) { 1194 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1195 } 1196 1197 if (ja) { 1198 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1199 cnt = 0; 1200 for (i=0; i<n; i++) { 1201 for (j=0; j<bs; j++) { 1202 for (k=tia[i]; k<tia[i+1]; k++) { 1203 for (l=0; l<bs; l++) { 1204 (*ja)[cnt++] = bs*tja[k] + l; 1205 } 1206 } 1207 } 1208 } 1209 } 1210 1211 n *= bs; 1212 nz *= bs*bs; 1213 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1214 ierr = PetscFree(tia);CHKERRQ(ierr); 1215 ierr = PetscFree(tja);CHKERRQ(ierr); 1216 } 1217 } else { 1218 *ia = tia; 1219 if (ja) *ja = tja; 1220 } 1221 if (oshift == 1) { 1222 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1223 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1230 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1231 { 1232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1233 PetscErrorCode ierr; 1234 PetscInt i,n = a->mbs,nz = a->i[n]; 1235 1236 PetscFunctionBegin; 1237 if (!ia) PetscFunctionReturn(0); 1238 if (!blockcompressed && A->rmap->bs > 1) { 1239 ierr = PetscFree(*ia);CHKERRQ(ierr); 1240 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1241 } else if (symmetric) { 1242 ierr = PetscFree(*ia);CHKERRQ(ierr); 1243 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1244 } else if (oshift == 1) { /* blockcompressed */ 1245 for (i=0; i<n+1; i++) a->i[i]--; 1246 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1247 } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1253 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1254 { 1255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1256 PetscErrorCode ierr; 1257 1258 PetscFunctionBegin; 1259 #if defined(PETSC_USE_LOG) 1260 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1261 #endif 1262 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1263 if (a->row) { 1264 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1265 } 1266 if (a->col) { 1267 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1268 } 1269 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1270 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1271 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1272 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1273 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1274 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1275 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1276 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1277 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1278 1279 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1280 ierr = PetscFree(a);CHKERRQ(ierr); 1281 1282 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1296 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1297 { 1298 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1299 PetscErrorCode ierr; 1300 1301 PetscFunctionBegin; 1302 switch (op) { 1303 case MAT_ROW_ORIENTED: 1304 a->roworiented = flg; 1305 break; 1306 case MAT_KEEP_NONZERO_PATTERN: 1307 a->keepnonzeropattern = flg; 1308 break; 1309 case MAT_NEW_NONZERO_LOCATIONS: 1310 a->nonew = (flg ? 0 : 1); 1311 break; 1312 case MAT_NEW_NONZERO_LOCATION_ERR: 1313 a->nonew = (flg ? -1 : 0); 1314 break; 1315 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1316 a->nonew = (flg ? -2 : 0); 1317 break; 1318 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1319 a->nounused = (flg ? -1 : 0); 1320 break; 1321 case MAT_NEW_DIAGONALS: 1322 case MAT_IGNORE_OFF_PROC_ENTRIES: 1323 case MAT_USE_HASH_TABLE: 1324 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1325 break; 1326 case MAT_SYMMETRIC: 1327 case MAT_STRUCTURALLY_SYMMETRIC: 1328 case MAT_HERMITIAN: 1329 case MAT_SYMMETRY_ETERNAL: 1330 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1331 break; 1332 default: 1333 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1334 } 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1340 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1341 { 1342 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1343 PetscErrorCode ierr; 1344 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1345 MatScalar *aa,*aa_i; 1346 PetscScalar *v_i; 1347 1348 PetscFunctionBegin; 1349 bs = A->rmap->bs; 1350 ai = a->i; 1351 aj = a->j; 1352 aa = a->a; 1353 bs2 = a->bs2; 1354 1355 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1356 1357 bn = row/bs; /* Block number */ 1358 bp = row % bs; /* Block Position */ 1359 M = ai[bn+1] - ai[bn]; 1360 *nz = bs*M; 1361 1362 if (v) { 1363 *v = 0; 1364 if (*nz) { 1365 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1366 for (i=0; i<M; i++) { /* for each block in the block row */ 1367 v_i = *v + i*bs; 1368 aa_i = aa + bs2*(ai[bn] + i); 1369 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1370 } 1371 } 1372 } 1373 1374 if (idx) { 1375 *idx = 0; 1376 if (*nz) { 1377 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1378 for (i=0; i<M; i++) { /* for each block in the block row */ 1379 idx_i = *idx + i*bs; 1380 itmp = bs*aj[ai[bn] + i]; 1381 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1382 } 1383 } 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1390 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1391 { 1392 PetscErrorCode ierr; 1393 1394 PetscFunctionBegin; 1395 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1396 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1402 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1403 { 1404 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1405 Mat C; 1406 PetscErrorCode ierr; 1407 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1408 PetscInt *rows,*cols,bs2=a->bs2; 1409 MatScalar *array; 1410 1411 PetscFunctionBegin; 1412 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1413 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1414 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1415 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1416 1417 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1418 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1419 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1420 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1421 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1422 ierr = PetscFree(col);CHKERRQ(ierr); 1423 } else { 1424 C = *B; 1425 } 1426 1427 array = a->a; 1428 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1429 cols = rows + bs; 1430 for (i=0; i<mbs; i++) { 1431 cols[0] = i*bs; 1432 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1433 len = ai[i+1] - ai[i]; 1434 for (j=0; j<len; j++) { 1435 rows[0] = (*aj++)*bs; 1436 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1437 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1438 array += bs2; 1439 } 1440 } 1441 ierr = PetscFree(rows);CHKERRQ(ierr); 1442 1443 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1444 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1445 1446 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1447 *B = C; 1448 } else { 1449 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1450 } 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1456 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1457 { 1458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1459 PetscErrorCode ierr; 1460 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1461 int fd; 1462 PetscScalar *aa; 1463 FILE *file; 1464 1465 PetscFunctionBegin; 1466 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1467 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1468 col_lens[0] = MAT_FILE_COOKIE; 1469 1470 col_lens[1] = A->rmap->N; 1471 col_lens[2] = A->cmap->n; 1472 col_lens[3] = a->nz*bs2; 1473 1474 /* store lengths of each row and write (including header) to file */ 1475 count = 0; 1476 for (i=0; i<a->mbs; i++) { 1477 for (j=0; j<bs; j++) { 1478 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1479 } 1480 } 1481 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1482 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1483 1484 /* store column indices (zero start index) */ 1485 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1486 count = 0; 1487 for (i=0; i<a->mbs; i++) { 1488 for (j=0; j<bs; j++) { 1489 for (k=a->i[i]; k<a->i[i+1]; k++) { 1490 for (l=0; l<bs; l++) { 1491 jj[count++] = bs*a->j[k] + l; 1492 } 1493 } 1494 } 1495 } 1496 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1497 ierr = PetscFree(jj);CHKERRQ(ierr); 1498 1499 /* store nonzero values */ 1500 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1501 count = 0; 1502 for (i=0; i<a->mbs; i++) { 1503 for (j=0; j<bs; j++) { 1504 for (k=a->i[i]; k<a->i[i+1]; k++) { 1505 for (l=0; l<bs; l++) { 1506 aa[count++] = a->a[bs2*k + l*bs + j]; 1507 } 1508 } 1509 } 1510 } 1511 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1512 ierr = PetscFree(aa);CHKERRQ(ierr); 1513 1514 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1515 if (file) { 1516 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1523 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1524 { 1525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1526 PetscErrorCode ierr; 1527 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1528 PetscViewerFormat format; 1529 1530 PetscFunctionBegin; 1531 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1532 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1533 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1534 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1535 Mat aij; 1536 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1537 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1538 ierr = MatDestroy(aij);CHKERRQ(ierr); 1539 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1540 PetscFunctionReturn(0); 1541 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1542 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1543 for (i=0; i<a->mbs; i++) { 1544 for (j=0; j<bs; j++) { 1545 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1546 for (k=a->i[i]; k<a->i[i+1]; k++) { 1547 for (l=0; l<bs; l++) { 1548 #if defined(PETSC_USE_COMPLEX) 1549 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1550 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1551 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1552 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1553 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1554 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1555 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1556 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1557 } 1558 #else 1559 if (a->a[bs2*k + l*bs + j] != 0.0) { 1560 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1561 } 1562 #endif 1563 } 1564 } 1565 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1566 } 1567 } 1568 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1569 } else { 1570 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1571 for (i=0; i<a->mbs; i++) { 1572 for (j=0; j<bs; j++) { 1573 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1574 for (k=a->i[i]; k<a->i[i+1]; k++) { 1575 for (l=0; l<bs; l++) { 1576 #if defined(PETSC_USE_COMPLEX) 1577 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1578 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1579 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1580 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1581 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1582 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1583 } else { 1584 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1585 } 1586 #else 1587 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1588 #endif 1589 } 1590 } 1591 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1592 } 1593 } 1594 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1595 } 1596 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1602 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1603 { 1604 Mat A = (Mat) Aa; 1605 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1606 PetscErrorCode ierr; 1607 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1608 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1609 MatScalar *aa; 1610 PetscViewer viewer; 1611 1612 PetscFunctionBegin; 1613 1614 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1615 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1616 1617 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1618 1619 /* loop over matrix elements drawing boxes */ 1620 color = PETSC_DRAW_BLUE; 1621 for (i=0,row=0; i<mbs; i++,row+=bs) { 1622 for (j=a->i[i]; j<a->i[i+1]; j++) { 1623 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1624 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1625 aa = a->a + j*bs2; 1626 for (k=0; k<bs; k++) { 1627 for (l=0; l<bs; l++) { 1628 if (PetscRealPart(*aa++) >= 0.) continue; 1629 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1630 } 1631 } 1632 } 1633 } 1634 color = PETSC_DRAW_CYAN; 1635 for (i=0,row=0; i<mbs; i++,row+=bs) { 1636 for (j=a->i[i]; j<a->i[i+1]; j++) { 1637 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1638 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1639 aa = a->a + j*bs2; 1640 for (k=0; k<bs; k++) { 1641 for (l=0; l<bs; l++) { 1642 if (PetscRealPart(*aa++) != 0.) continue; 1643 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1644 } 1645 } 1646 } 1647 } 1648 1649 color = PETSC_DRAW_RED; 1650 for (i=0,row=0; i<mbs; i++,row+=bs) { 1651 for (j=a->i[i]; j<a->i[i+1]; j++) { 1652 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1653 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1654 aa = a->a + j*bs2; 1655 for (k=0; k<bs; k++) { 1656 for (l=0; l<bs; l++) { 1657 if (PetscRealPart(*aa++) <= 0.) continue; 1658 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1659 } 1660 } 1661 } 1662 } 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1668 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1669 { 1670 PetscErrorCode ierr; 1671 PetscReal xl,yl,xr,yr,w,h; 1672 PetscDraw draw; 1673 PetscTruth isnull; 1674 1675 PetscFunctionBegin; 1676 1677 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1678 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1679 1680 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1681 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1682 xr += w; yr += h; xl = -w; yl = -h; 1683 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1684 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1685 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatView_SeqBAIJ" 1691 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1692 { 1693 PetscErrorCode ierr; 1694 PetscTruth iascii,isbinary,isdraw; 1695 1696 PetscFunctionBegin; 1697 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1698 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1699 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1700 if (iascii){ 1701 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1702 } else if (isbinary) { 1703 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1704 } else if (isdraw) { 1705 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1706 } else { 1707 Mat B; 1708 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1709 ierr = MatView(B,viewer);CHKERRQ(ierr); 1710 ierr = MatDestroy(B);CHKERRQ(ierr); 1711 } 1712 PetscFunctionReturn(0); 1713 } 1714 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1718 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1719 { 1720 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1721 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1722 PetscInt *ai = a->i,*ailen = a->ilen; 1723 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1724 MatScalar *ap,*aa = a->a; 1725 1726 PetscFunctionBegin; 1727 for (k=0; k<m; k++) { /* loop over rows */ 1728 row = im[k]; brow = row/bs; 1729 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1730 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1731 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1732 nrow = ailen[brow]; 1733 for (l=0; l<n; l++) { /* loop over columns */ 1734 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1735 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1736 col = in[l] ; 1737 bcol = col/bs; 1738 cidx = col%bs; 1739 ridx = row%bs; 1740 high = nrow; 1741 low = 0; /* assume unsorted */ 1742 while (high-low > 5) { 1743 t = (low+high)/2; 1744 if (rp[t] > bcol) high = t; 1745 else low = t; 1746 } 1747 for (i=low; i<high; i++) { 1748 if (rp[i] > bcol) break; 1749 if (rp[i] == bcol) { 1750 *v++ = ap[bs2*i+bs*cidx+ridx]; 1751 goto finished; 1752 } 1753 } 1754 *v++ = 0.0; 1755 finished:; 1756 } 1757 } 1758 PetscFunctionReturn(0); 1759 } 1760 1761 #define CHUNKSIZE 10 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1764 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1765 { 1766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1767 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1768 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1769 PetscErrorCode ierr; 1770 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1771 PetscTruth roworiented=a->roworiented; 1772 const PetscScalar *value = v; 1773 MatScalar *ap,*aa = a->a,*bap; 1774 1775 PetscFunctionBegin; 1776 if (roworiented) { 1777 stepval = (n-1)*bs; 1778 } else { 1779 stepval = (m-1)*bs; 1780 } 1781 for (k=0; k<m; k++) { /* loop over added rows */ 1782 row = im[k]; 1783 if (row < 0) continue; 1784 #if defined(PETSC_USE_DEBUG) 1785 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1786 #endif 1787 rp = aj + ai[row]; 1788 ap = aa + bs2*ai[row]; 1789 rmax = imax[row]; 1790 nrow = ailen[row]; 1791 low = 0; 1792 high = nrow; 1793 for (l=0; l<n; l++) { /* loop over added columns */ 1794 if (in[l] < 0) continue; 1795 #if defined(PETSC_USE_DEBUG) 1796 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1797 #endif 1798 col = in[l]; 1799 if (roworiented) { 1800 value = v + k*(stepval+bs)*bs + l*bs; 1801 } else { 1802 value = v + l*(stepval+bs)*bs + k*bs; 1803 } 1804 if (col <= lastcol) low = 0; else high = nrow; 1805 lastcol = col; 1806 while (high-low > 7) { 1807 t = (low+high)/2; 1808 if (rp[t] > col) high = t; 1809 else low = t; 1810 } 1811 for (i=low; i<high; i++) { 1812 if (rp[i] > col) break; 1813 if (rp[i] == col) { 1814 bap = ap + bs2*i; 1815 if (roworiented) { 1816 if (is == ADD_VALUES) { 1817 for (ii=0; ii<bs; ii++,value+=stepval) { 1818 for (jj=ii; jj<bs2; jj+=bs) { 1819 bap[jj] += *value++; 1820 } 1821 } 1822 } else { 1823 for (ii=0; ii<bs; ii++,value+=stepval) { 1824 for (jj=ii; jj<bs2; jj+=bs) { 1825 bap[jj] = *value++; 1826 } 1827 } 1828 } 1829 } else { 1830 if (is == ADD_VALUES) { 1831 for (ii=0; ii<bs; ii++,value+=stepval) { 1832 for (jj=0; jj<bs; jj++) { 1833 *bap++ += *value++; 1834 } 1835 } 1836 } else { 1837 for (ii=0; ii<bs; ii++,value+=stepval) { 1838 for (jj=0; jj<bs; jj++) { 1839 *bap++ = *value++; 1840 } 1841 } 1842 } 1843 } 1844 goto noinsert2; 1845 } 1846 } 1847 if (nonew == 1) goto noinsert2; 1848 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1849 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1850 N = nrow++ - 1; high++; 1851 /* shift up all the later entries in this row */ 1852 for (ii=N; ii>=i; ii--) { 1853 rp[ii+1] = rp[ii]; 1854 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1855 } 1856 if (N >= i) { 1857 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1858 } 1859 rp[i] = col; 1860 bap = ap + bs2*i; 1861 if (roworiented) { 1862 for (ii=0; ii<bs; ii++,value+=stepval) { 1863 for (jj=ii; jj<bs2; jj+=bs) { 1864 bap[jj] = *value++; 1865 } 1866 } 1867 } else { 1868 for (ii=0; ii<bs; ii++,value+=stepval) { 1869 for (jj=0; jj<bs; jj++) { 1870 *bap++ = *value++; 1871 } 1872 } 1873 } 1874 noinsert2:; 1875 low = i; 1876 } 1877 ailen[row] = nrow; 1878 } 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1884 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1885 { 1886 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1887 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1888 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1889 PetscErrorCode ierr; 1890 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1891 MatScalar *aa = a->a,*ap; 1892 PetscReal ratio=0.6; 1893 1894 PetscFunctionBegin; 1895 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1896 1897 if (m) rmax = ailen[0]; 1898 for (i=1; i<mbs; i++) { 1899 /* move each row back by the amount of empty slots (fshift) before it*/ 1900 fshift += imax[i-1] - ailen[i-1]; 1901 rmax = PetscMax(rmax,ailen[i]); 1902 if (fshift) { 1903 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1904 N = ailen[i]; 1905 for (j=0; j<N; j++) { 1906 ip[j-fshift] = ip[j]; 1907 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1908 } 1909 } 1910 ai[i] = ai[i-1] + ailen[i-1]; 1911 } 1912 if (mbs) { 1913 fshift += imax[mbs-1] - ailen[mbs-1]; 1914 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1915 } 1916 /* reset ilen and imax for each row */ 1917 for (i=0; i<mbs; i++) { 1918 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1919 } 1920 a->nz = ai[mbs]; 1921 1922 /* diagonals may have moved, so kill the diagonal pointers */ 1923 a->idiagvalid = PETSC_FALSE; 1924 if (fshift && a->diag) { 1925 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1926 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1927 a->diag = 0; 1928 } 1929 if (fshift && a->nounused == -1) { 1930 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1931 } 1932 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1933 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1934 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1935 a->reallocs = 0; 1936 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1937 1938 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1939 if (a->compressedrow.use){ 1940 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1941 } 1942 1943 A->same_nonzero = PETSC_TRUE; 1944 PetscFunctionReturn(0); 1945 } 1946 1947 /* 1948 This function returns an array of flags which indicate the locations of contiguous 1949 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1950 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1951 Assume: sizes should be long enough to hold all the values. 1952 */ 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1955 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1956 { 1957 PetscInt i,j,k,row; 1958 PetscTruth flg; 1959 1960 PetscFunctionBegin; 1961 for (i=0,j=0; i<n; j++) { 1962 row = idx[i]; 1963 if (row%bs!=0) { /* Not the begining of a block */ 1964 sizes[j] = 1; 1965 i++; 1966 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1967 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1968 i++; 1969 } else { /* Begining of the block, so check if the complete block exists */ 1970 flg = PETSC_TRUE; 1971 for (k=1; k<bs; k++) { 1972 if (row+k != idx[i+k]) { /* break in the block */ 1973 flg = PETSC_FALSE; 1974 break; 1975 } 1976 } 1977 if (flg) { /* No break in the bs */ 1978 sizes[j] = bs; 1979 i+= bs; 1980 } else { 1981 sizes[j] = 1; 1982 i++; 1983 } 1984 } 1985 } 1986 *bs_max = j; 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1992 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1993 { 1994 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1995 PetscErrorCode ierr; 1996 PetscInt i,j,k,count,*rows; 1997 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1998 PetscScalar zero = 0.0; 1999 MatScalar *aa; 2000 2001 PetscFunctionBegin; 2002 /* Make a copy of the IS and sort it */ 2003 /* allocate memory for rows,sizes */ 2004 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2005 sizes = rows + is_n; 2006 2007 /* copy IS values to rows, and sort them */ 2008 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2009 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2010 if (baij->keepnonzeropattern) { 2011 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2012 bs_max = is_n; 2013 A->same_nonzero = PETSC_TRUE; 2014 } else { 2015 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2016 A->same_nonzero = PETSC_FALSE; 2017 } 2018 2019 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2020 row = rows[j]; 2021 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2022 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2023 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2024 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2025 if (diag != 0.0) { 2026 if (baij->ilen[row/bs] > 0) { 2027 baij->ilen[row/bs] = 1; 2028 baij->j[baij->i[row/bs]] = row/bs; 2029 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2030 } 2031 /* Now insert all the diagonal values for this bs */ 2032 for (k=0; k<bs; k++) { 2033 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2034 } 2035 } else { /* (diag == 0.0) */ 2036 baij->ilen[row/bs] = 0; 2037 } /* end (diag == 0.0) */ 2038 } else { /* (sizes[i] != bs) */ 2039 #if defined (PETSC_USE_DEBUG) 2040 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2041 #endif 2042 for (k=0; k<count; k++) { 2043 aa[0] = zero; 2044 aa += bs; 2045 } 2046 if (diag != 0.0) { 2047 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2048 } 2049 } 2050 } 2051 2052 ierr = PetscFree(rows);CHKERRQ(ierr); 2053 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2059 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2060 { 2061 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2062 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2063 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2064 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2065 PetscErrorCode ierr; 2066 PetscInt ridx,cidx,bs2=a->bs2; 2067 PetscTruth roworiented=a->roworiented; 2068 MatScalar *ap,value,*aa=a->a,*bap; 2069 2070 PetscFunctionBegin; 2071 for (k=0; k<m; k++) { /* loop over added rows */ 2072 row = im[k]; 2073 brow = row/bs; 2074 if (row < 0) continue; 2075 #if defined(PETSC_USE_DEBUG) 2076 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2077 #endif 2078 rp = aj + ai[brow]; 2079 ap = aa + bs2*ai[brow]; 2080 rmax = imax[brow]; 2081 nrow = ailen[brow]; 2082 low = 0; 2083 high = nrow; 2084 for (l=0; l<n; l++) { /* loop over added columns */ 2085 if (in[l] < 0) continue; 2086 #if defined(PETSC_USE_DEBUG) 2087 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2088 #endif 2089 col = in[l]; bcol = col/bs; 2090 ridx = row % bs; cidx = col % bs; 2091 if (roworiented) { 2092 value = v[l + k*n]; 2093 } else { 2094 value = v[k + l*m]; 2095 } 2096 if (col <= lastcol) low = 0; else high = nrow; 2097 lastcol = col; 2098 while (high-low > 7) { 2099 t = (low+high)/2; 2100 if (rp[t] > bcol) high = t; 2101 else low = t; 2102 } 2103 for (i=low; i<high; i++) { 2104 if (rp[i] > bcol) break; 2105 if (rp[i] == bcol) { 2106 bap = ap + bs2*i + bs*cidx + ridx; 2107 if (is == ADD_VALUES) *bap += value; 2108 else *bap = value; 2109 goto noinsert1; 2110 } 2111 } 2112 if (nonew == 1) goto noinsert1; 2113 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2114 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2115 N = nrow++ - 1; high++; 2116 /* shift up all the later entries in this row */ 2117 for (ii=N; ii>=i; ii--) { 2118 rp[ii+1] = rp[ii]; 2119 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2120 } 2121 if (N>=i) { 2122 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2123 } 2124 rp[i] = bcol; 2125 ap[bs2*i + bs*cidx + ridx] = value; 2126 a->nz++; 2127 noinsert1:; 2128 low = i; 2129 } 2130 ailen[brow] = nrow; 2131 } 2132 A->same_nonzero = PETSC_FALSE; 2133 PetscFunctionReturn(0); 2134 } 2135 2136 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2137 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2140 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2141 { 2142 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2143 Mat outA; 2144 PetscErrorCode ierr; 2145 PetscTruth row_identity,col_identity; 2146 2147 PetscFunctionBegin; 2148 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2149 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2150 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2151 if (!row_identity || !col_identity) { 2152 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2153 } 2154 2155 outA = inA; 2156 inA->factor = MAT_FACTOR_LU; 2157 2158 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2159 2160 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2161 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2162 a->row = row; 2163 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2164 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2165 a->col = col; 2166 2167 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2168 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2169 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2170 2171 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2172 if (!a->solve_work) { 2173 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2174 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2175 } 2176 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2177 2178 PetscFunctionReturn(0); 2179 } 2180 2181 EXTERN_C_BEGIN 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2184 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2185 { 2186 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2187 PetscInt i,nz,mbs; 2188 2189 PetscFunctionBegin; 2190 nz = baij->maxnz/baij->bs2; 2191 mbs = baij->mbs; 2192 for (i=0; i<nz; i++) { 2193 baij->j[i] = indices[i]; 2194 } 2195 baij->nz = nz; 2196 for (i=0; i<mbs; i++) { 2197 baij->ilen[i] = baij->imax[i]; 2198 } 2199 PetscFunctionReturn(0); 2200 } 2201 EXTERN_C_END 2202 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2205 /*@ 2206 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2207 in the matrix. 2208 2209 Input Parameters: 2210 + mat - the SeqBAIJ matrix 2211 - indices - the column indices 2212 2213 Level: advanced 2214 2215 Notes: 2216 This can be called if you have precomputed the nonzero structure of the 2217 matrix and want to provide it to the matrix object to improve the performance 2218 of the MatSetValues() operation. 2219 2220 You MUST have set the correct numbers of nonzeros per row in the call to 2221 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2222 2223 MUST be called before any calls to MatSetValues(); 2224 2225 @*/ 2226 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2227 { 2228 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2229 2230 PetscFunctionBegin; 2231 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2232 PetscValidPointer(indices,2); 2233 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2234 if (f) { 2235 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2236 } else { 2237 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2238 } 2239 PetscFunctionReturn(0); 2240 } 2241 2242 #undef __FUNCT__ 2243 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2244 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2245 { 2246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2247 PetscErrorCode ierr; 2248 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2249 PetscReal atmp; 2250 PetscScalar *x,zero = 0.0; 2251 MatScalar *aa; 2252 PetscInt ncols,brow,krow,kcol; 2253 2254 PetscFunctionBegin; 2255 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2256 bs = A->rmap->bs; 2257 aa = a->a; 2258 ai = a->i; 2259 aj = a->j; 2260 mbs = a->mbs; 2261 2262 ierr = VecSet(v,zero);CHKERRQ(ierr); 2263 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2264 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2265 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2266 for (i=0; i<mbs; i++) { 2267 ncols = ai[1] - ai[0]; ai++; 2268 brow = bs*i; 2269 for (j=0; j<ncols; j++){ 2270 for (kcol=0; kcol<bs; kcol++){ 2271 for (krow=0; krow<bs; krow++){ 2272 atmp = PetscAbsScalar(*aa);aa++; 2273 row = brow + krow; /* row index */ 2274 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2275 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2276 } 2277 } 2278 aj++; 2279 } 2280 } 2281 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatCopy_SeqBAIJ" 2287 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2288 { 2289 PetscErrorCode ierr; 2290 2291 PetscFunctionBegin; 2292 /* If the two matrices have the same copy implementation, use fast copy. */ 2293 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2294 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2295 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2296 2297 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2298 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2299 } 2300 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2301 } else { 2302 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2303 } 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2309 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2310 { 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2320 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2321 { 2322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2323 PetscFunctionBegin; 2324 *array = a->a; 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2330 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2331 { 2332 PetscFunctionBegin; 2333 PetscFunctionReturn(0); 2334 } 2335 2336 #include "petscblaslapack.h" 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2339 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2340 { 2341 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2342 PetscErrorCode ierr; 2343 PetscInt i,bs=Y->rmap->bs,j,bs2; 2344 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2345 2346 PetscFunctionBegin; 2347 if (str == SAME_NONZERO_PATTERN) { 2348 PetscScalar alpha = a; 2349 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2350 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2351 if (y->xtoy && y->XtoY != X) { 2352 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2353 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2354 } 2355 if (!y->xtoy) { /* get xtoy */ 2356 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2357 y->XtoY = X; 2358 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2359 } 2360 bs2 = bs*bs; 2361 for (i=0; i<x->nz; i++) { 2362 j = 0; 2363 while (j < bs2){ 2364 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2365 j++; 2366 } 2367 } 2368 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2369 } else { 2370 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 2375 #undef __FUNCT__ 2376 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2377 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2378 { 2379 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2380 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2381 MatScalar *aa = a->a; 2382 2383 PetscFunctionBegin; 2384 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2390 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2391 { 2392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2393 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2394 MatScalar *aa = a->a; 2395 2396 PetscFunctionBegin; 2397 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2398 PetscFunctionReturn(0); 2399 } 2400 2401 2402 /* -------------------------------------------------------------------*/ 2403 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2404 MatGetRow_SeqBAIJ, 2405 MatRestoreRow_SeqBAIJ, 2406 MatMult_SeqBAIJ_N, 2407 /* 4*/ MatMultAdd_SeqBAIJ_N, 2408 MatMultTranspose_SeqBAIJ, 2409 MatMultTransposeAdd_SeqBAIJ, 2410 0, 2411 0, 2412 0, 2413 /*10*/ 0, 2414 MatLUFactor_SeqBAIJ, 2415 0, 2416 0, 2417 MatTranspose_SeqBAIJ, 2418 /*15*/ MatGetInfo_SeqBAIJ, 2419 MatEqual_SeqBAIJ, 2420 MatGetDiagonal_SeqBAIJ, 2421 MatDiagonalScale_SeqBAIJ, 2422 MatNorm_SeqBAIJ, 2423 /*20*/ 0, 2424 MatAssemblyEnd_SeqBAIJ, 2425 MatSetOption_SeqBAIJ, 2426 MatZeroEntries_SeqBAIJ, 2427 /*24*/ MatZeroRows_SeqBAIJ, 2428 0, 2429 0, 2430 0, 2431 0, 2432 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2433 0, 2434 0, 2435 MatGetArray_SeqBAIJ, 2436 MatRestoreArray_SeqBAIJ, 2437 /*34*/ MatDuplicate_SeqBAIJ, 2438 0, 2439 0, 2440 MatILUFactor_SeqBAIJ, 2441 0, 2442 /*39*/ MatAXPY_SeqBAIJ, 2443 MatGetSubMatrices_SeqBAIJ, 2444 MatIncreaseOverlap_SeqBAIJ, 2445 MatGetValues_SeqBAIJ, 2446 MatCopy_SeqBAIJ, 2447 /*44*/ 0, 2448 MatScale_SeqBAIJ, 2449 0, 2450 0, 2451 MatILUDTFactor_SeqBAIJ, 2452 /*49*/ 0, 2453 MatGetRowIJ_SeqBAIJ, 2454 MatRestoreRowIJ_SeqBAIJ, 2455 0, 2456 0, 2457 /*54*/ 0, 2458 0, 2459 0, 2460 0, 2461 MatSetValuesBlocked_SeqBAIJ, 2462 /*59*/ MatGetSubMatrix_SeqBAIJ, 2463 MatDestroy_SeqBAIJ, 2464 MatView_SeqBAIJ, 2465 0, 2466 0, 2467 /*64*/ 0, 2468 0, 2469 0, 2470 0, 2471 0, 2472 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2473 0, 2474 MatConvert_Basic, 2475 0, 2476 0, 2477 /*74*/ 0, 2478 0, 2479 0, 2480 0, 2481 0, 2482 /*79*/ 0, 2483 0, 2484 0, 2485 0, 2486 MatLoad_SeqBAIJ, 2487 /*84*/ 0, 2488 0, 2489 0, 2490 0, 2491 0, 2492 /*89*/ 0, 2493 0, 2494 0, 2495 0, 2496 0, 2497 /*94*/ 0, 2498 0, 2499 0, 2500 0, 2501 0, 2502 /*99*/0, 2503 0, 2504 0, 2505 0, 2506 0, 2507 /*104*/0, 2508 MatRealPart_SeqBAIJ, 2509 MatImaginaryPart_SeqBAIJ, 2510 0, 2511 0, 2512 /*109*/0, 2513 0, 2514 0, 2515 0, 2516 MatMissingDiagonal_SeqBAIJ 2517 /*114*/ 2518 }; 2519 2520 EXTERN_C_BEGIN 2521 #undef __FUNCT__ 2522 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2523 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2524 { 2525 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2526 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 if (aij->nonew != 1) { 2531 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2532 } 2533 2534 /* allocate space for values if not already there */ 2535 if (!aij->saved_values) { 2536 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2537 } 2538 2539 /* copy values over */ 2540 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 EXTERN_C_END 2544 2545 EXTERN_C_BEGIN 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2548 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2549 { 2550 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2551 PetscErrorCode ierr; 2552 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2553 2554 PetscFunctionBegin; 2555 if (aij->nonew != 1) { 2556 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2557 } 2558 if (!aij->saved_values) { 2559 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2560 } 2561 2562 /* copy values over */ 2563 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 EXTERN_C_END 2567 2568 EXTERN_C_BEGIN 2569 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2570 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2571 EXTERN_C_END 2572 2573 EXTERN_C_BEGIN 2574 #undef __FUNCT__ 2575 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2576 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2577 { 2578 Mat_SeqBAIJ *b; 2579 PetscErrorCode ierr; 2580 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2581 PetscTruth flg,skipallocation = PETSC_FALSE; 2582 2583 PetscFunctionBegin; 2584 2585 if (nz == MAT_SKIP_ALLOCATION) { 2586 skipallocation = PETSC_TRUE; 2587 nz = 0; 2588 } 2589 2590 if (bs < 0) { 2591 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2592 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2593 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2594 bs = PetscAbs(bs); 2595 } 2596 if (nnz && newbs != bs) { 2597 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2598 } 2599 bs = newbs; 2600 2601 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2602 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2603 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2604 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2605 2606 B->preallocated = PETSC_TRUE; 2607 2608 mbs = B->rmap->n/bs; 2609 nbs = B->cmap->n/bs; 2610 bs2 = bs*bs; 2611 2612 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2613 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2614 } 2615 2616 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2617 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2618 if (nnz) { 2619 for (i=0; i<mbs; i++) { 2620 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2621 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2622 } 2623 } 2624 2625 b = (Mat_SeqBAIJ*)B->data; 2626 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2627 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2628 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2629 2630 if (!flg) { 2631 switch (bs) { 2632 case 1: 2633 B->ops->mult = MatMult_SeqBAIJ_1; 2634 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2635 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2636 break; 2637 case 2: 2638 B->ops->mult = MatMult_SeqBAIJ_2; 2639 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2640 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2641 break; 2642 case 3: 2643 B->ops->mult = MatMult_SeqBAIJ_3; 2644 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2645 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2646 break; 2647 case 4: 2648 B->ops->mult = MatMult_SeqBAIJ_4; 2649 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2650 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2651 break; 2652 case 5: 2653 B->ops->mult = MatMult_SeqBAIJ_5; 2654 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2655 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2656 break; 2657 case 6: 2658 B->ops->mult = MatMult_SeqBAIJ_6; 2659 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2660 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2661 break; 2662 case 7: 2663 B->ops->mult = MatMult_SeqBAIJ_7; 2664 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2665 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2666 break; 2667 default: 2668 B->ops->mult = MatMult_SeqBAIJ_N; 2669 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2670 break; 2671 } 2672 } 2673 B->rmap->bs = bs; 2674 b->mbs = mbs; 2675 b->nbs = nbs; 2676 if (!skipallocation) { 2677 if (!b->imax) { 2678 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2679 } 2680 /* b->ilen will count nonzeros in each block row so far. */ 2681 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2682 if (!nnz) { 2683 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2684 else if (nz <= 0) nz = 1; 2685 for (i=0; i<mbs; i++) b->imax[i] = nz; 2686 nz = nz*mbs; 2687 } else { 2688 nz = 0; 2689 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2690 } 2691 2692 /* allocate the matrix space */ 2693 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2694 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2695 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2696 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2697 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2698 b->singlemalloc = PETSC_TRUE; 2699 b->i[0] = 0; 2700 for (i=1; i<mbs+1; i++) { 2701 b->i[i] = b->i[i-1] + b->imax[i-1]; 2702 } 2703 b->free_a = PETSC_TRUE; 2704 b->free_ij = PETSC_TRUE; 2705 } else { 2706 b->free_a = PETSC_FALSE; 2707 b->free_ij = PETSC_FALSE; 2708 } 2709 2710 B->rmap->bs = bs; 2711 b->bs2 = bs2; 2712 b->mbs = mbs; 2713 b->nz = 0; 2714 b->maxnz = nz*bs2; 2715 B->info.nz_unneeded = (PetscReal)b->maxnz; 2716 PetscFunctionReturn(0); 2717 } 2718 EXTERN_C_END 2719 2720 EXTERN_C_BEGIN 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2723 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2724 { 2725 PetscInt i,m,nz,nz_max=0,*nnz; 2726 PetscScalar *values=0; 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 2731 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2732 2733 ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2734 ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2735 ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2736 ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2737 m = B->rmap->n/bs; 2738 2739 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 2740 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 2741 for(i=0; i<m; i++) { 2742 nz = ii[i+1]- ii[i]; 2743 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 2744 nz_max = PetscMax(nz_max, nz); 2745 nnz[i] = nz; 2746 } 2747 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2748 ierr = PetscFree(nnz);CHKERRQ(ierr); 2749 2750 values = (PetscScalar*)V; 2751 if (!values) { 2752 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2753 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2754 } 2755 for (i=0; i<m; i++) { 2756 PetscInt ncols = ii[i+1] - ii[i]; 2757 const PetscInt *icols = jj + ii[i]; 2758 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2759 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2760 } 2761 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2762 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 2765 PetscFunctionReturn(0); 2766 } 2767 EXTERN_C_END 2768 2769 2770 EXTERN_C_BEGIN 2771 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2772 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 2773 EXTERN_C_END 2774 2775 /*MC 2776 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2777 block sparse compressed row format. 2778 2779 Options Database Keys: 2780 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2781 2782 Level: beginner 2783 2784 .seealso: MatCreateSeqBAIJ() 2785 M*/ 2786 2787 2788 EXTERN_C_BEGIN 2789 #undef __FUNCT__ 2790 #define __FUNCT__ "MatCreate_SeqBAIJ" 2791 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2792 { 2793 PetscErrorCode ierr; 2794 PetscMPIInt size; 2795 Mat_SeqBAIJ *b; 2796 2797 PetscFunctionBegin; 2798 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2799 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2800 2801 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2802 B->data = (void*)b; 2803 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2804 B->mapping = 0; 2805 b->row = 0; 2806 b->col = 0; 2807 b->icol = 0; 2808 b->reallocs = 0; 2809 b->saved_values = 0; 2810 2811 b->roworiented = PETSC_TRUE; 2812 b->nonew = 0; 2813 b->diag = 0; 2814 b->solve_work = 0; 2815 b->mult_work = 0; 2816 B->spptr = 0; 2817 B->info.nz_unneeded = (PetscReal)b->maxnz; 2818 b->keepnonzeropattern = PETSC_FALSE; 2819 b->xtoy = 0; 2820 b->XtoY = 0; 2821 b->compressedrow.use = PETSC_FALSE; 2822 b->compressedrow.nrows = 0; 2823 b->compressedrow.i = PETSC_NULL; 2824 b->compressedrow.rindex = PETSC_NULL; 2825 b->compressedrow.checked = PETSC_FALSE; 2826 B->same_nonzero = PETSC_FALSE; 2827 2828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 2829 "MatGetFactorAvailable_seqbaij_petsc", 2830 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 2831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 2832 "MatGetFactor_seqbaij_petsc", 2833 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 2834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2835 "MatInvertBlockDiagonal_SeqBAIJ", 2836 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2838 "MatStoreValues_SeqBAIJ", 2839 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2841 "MatRetrieveValues_SeqBAIJ", 2842 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2844 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2845 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2847 "MatConvert_SeqBAIJ_SeqAIJ", 2848 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2850 "MatConvert_SeqBAIJ_SeqSBAIJ", 2851 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2853 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2854 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 2856 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 2857 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2858 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 EXTERN_C_END 2862 2863 #undef __FUNCT__ 2864 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 2865 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues) 2866 { 2867 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2868 PetscErrorCode ierr; 2869 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2870 2871 PetscFunctionBegin; 2872 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2873 2874 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2875 for (i=0; i<mbs; i++) { 2876 c->imax[i] = a->imax[i]; 2877 c->ilen[i] = a->ilen[i]; 2878 } 2879 2880 /* allocate the matrix space */ 2881 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2882 c->singlemalloc = PETSC_TRUE; 2883 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2884 if (mbs > 0) { 2885 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2886 if (cpvalues == MAT_COPY_VALUES) { 2887 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2888 } else { 2889 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2890 } 2891 } 2892 c->roworiented = a->roworiented; 2893 c->nonew = a->nonew; 2894 ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 2895 ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 2896 c->bs2 = a->bs2; 2897 c->mbs = a->mbs; 2898 c->nbs = a->nbs; 2899 2900 if (a->diag) { 2901 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2902 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2903 for (i=0; i<mbs; i++) { 2904 c->diag[i] = a->diag[i]; 2905 } 2906 } else c->diag = 0; 2907 c->nz = a->nz; 2908 c->maxnz = a->maxnz; 2909 c->solve_work = 0; 2910 c->mult_work = 0; 2911 c->free_a = PETSC_TRUE; 2912 c->free_ij = PETSC_TRUE; 2913 C->preallocated = PETSC_TRUE; 2914 C->assembled = PETSC_TRUE; 2915 2916 c->compressedrow.use = a->compressedrow.use; 2917 c->compressedrow.nrows = a->compressedrow.nrows; 2918 c->compressedrow.checked = a->compressedrow.checked; 2919 if ( a->compressedrow.checked && a->compressedrow.use){ 2920 i = a->compressedrow.nrows; 2921 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2922 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2923 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2924 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2925 } else { 2926 c->compressedrow.use = PETSC_FALSE; 2927 c->compressedrow.i = PETSC_NULL; 2928 c->compressedrow.rindex = PETSC_NULL; 2929 } 2930 C->same_nonzero = A->same_nonzero; 2931 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2937 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2938 { 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 2943 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 2944 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 2945 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #undef __FUNCT__ 2950 #define __FUNCT__ "MatLoad_SeqBAIJ" 2951 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2952 { 2953 Mat_SeqBAIJ *a; 2954 Mat B; 2955 PetscErrorCode ierr; 2956 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2957 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2958 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2959 PetscInt *masked,nmask,tmp,bs2,ishift; 2960 PetscMPIInt size; 2961 int fd; 2962 PetscScalar *aa; 2963 MPI_Comm comm = ((PetscObject)viewer)->comm; 2964 2965 PetscFunctionBegin; 2966 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2967 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2968 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2969 bs2 = bs*bs; 2970 2971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2972 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2973 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2974 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2975 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2976 M = header[1]; N = header[2]; nz = header[3]; 2977 2978 if (header[3] < 0) { 2979 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2980 } 2981 2982 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2983 2984 /* 2985 This code adds extra rows to make sure the number of rows is 2986 divisible by the blocksize 2987 */ 2988 mbs = M/bs; 2989 extra_rows = bs - M + bs*(mbs); 2990 if (extra_rows == bs) extra_rows = 0; 2991 else mbs++; 2992 if (extra_rows) { 2993 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2994 } 2995 2996 /* read in row lengths */ 2997 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2998 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2999 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3000 3001 /* read in column indices */ 3002 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3003 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3004 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3005 3006 /* loop over row lengths determining block row lengths */ 3007 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3008 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3009 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3010 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3011 masked = mask + mbs; 3012 rowcount = 0; nzcount = 0; 3013 for (i=0; i<mbs; i++) { 3014 nmask = 0; 3015 for (j=0; j<bs; j++) { 3016 kmax = rowlengths[rowcount]; 3017 for (k=0; k<kmax; k++) { 3018 tmp = jj[nzcount++]/bs; 3019 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3020 } 3021 rowcount++; 3022 } 3023 browlengths[i] += nmask; 3024 /* zero out the mask elements we set */ 3025 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3026 } 3027 3028 /* create our matrix */ 3029 ierr = MatCreate(comm,&B); 3030 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3031 ierr = MatSetType(B,type);CHKERRQ(ierr); 3032 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3033 a = (Mat_SeqBAIJ*)B->data; 3034 3035 /* set matrix "i" values */ 3036 a->i[0] = 0; 3037 for (i=1; i<= mbs; i++) { 3038 a->i[i] = a->i[i-1] + browlengths[i-1]; 3039 a->ilen[i-1] = browlengths[i-1]; 3040 } 3041 a->nz = 0; 3042 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3043 3044 /* read in nonzero values */ 3045 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3046 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3047 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3048 3049 /* set "a" and "j" values into matrix */ 3050 nzcount = 0; jcount = 0; 3051 for (i=0; i<mbs; i++) { 3052 nzcountb = nzcount; 3053 nmask = 0; 3054 for (j=0; j<bs; j++) { 3055 kmax = rowlengths[i*bs+j]; 3056 for (k=0; k<kmax; k++) { 3057 tmp = jj[nzcount++]/bs; 3058 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3059 } 3060 } 3061 /* sort the masked values */ 3062 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3063 3064 /* set "j" values into matrix */ 3065 maskcount = 1; 3066 for (j=0; j<nmask; j++) { 3067 a->j[jcount++] = masked[j]; 3068 mask[masked[j]] = maskcount++; 3069 } 3070 /* set "a" values into matrix */ 3071 ishift = bs2*a->i[i]; 3072 for (j=0; j<bs; j++) { 3073 kmax = rowlengths[i*bs+j]; 3074 for (k=0; k<kmax; k++) { 3075 tmp = jj[nzcountb]/bs ; 3076 block = mask[tmp] - 1; 3077 point = jj[nzcountb] - bs*tmp; 3078 idx = ishift + bs2*block + j + bs*point; 3079 a->a[idx] = (MatScalar)aa[nzcountb++]; 3080 } 3081 } 3082 /* zero out the mask elements we set */ 3083 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3084 } 3085 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3086 3087 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3088 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3089 ierr = PetscFree(aa);CHKERRQ(ierr); 3090 ierr = PetscFree(jj);CHKERRQ(ierr); 3091 ierr = PetscFree(mask);CHKERRQ(ierr); 3092 3093 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 ierr = MatView_Private(B);CHKERRQ(ierr); 3096 3097 *A = B; 3098 PetscFunctionReturn(0); 3099 } 3100 3101 #undef __FUNCT__ 3102 #define __FUNCT__ "MatCreateSeqBAIJ" 3103 /*@C 3104 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3105 compressed row) format. For good matrix assembly performance the 3106 user should preallocate the matrix storage by setting the parameter nz 3107 (or the array nnz). By setting these parameters accurately, performance 3108 during matrix assembly can be increased by more than a factor of 50. 3109 3110 Collective on MPI_Comm 3111 3112 Input Parameters: 3113 + comm - MPI communicator, set to PETSC_COMM_SELF 3114 . bs - size of block 3115 . m - number of rows 3116 . n - number of columns 3117 . nz - number of nonzero blocks per block row (same for all rows) 3118 - nnz - array containing the number of nonzero blocks in the various block rows 3119 (possibly different for each block row) or PETSC_NULL 3120 3121 Output Parameter: 3122 . A - the matrix 3123 3124 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3125 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3126 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3127 3128 Options Database Keys: 3129 . -mat_no_unroll - uses code that does not unroll the loops in the 3130 block calculations (much slower) 3131 . -mat_block_size - size of the blocks to use 3132 3133 Level: intermediate 3134 3135 Notes: 3136 The number of rows and columns must be divisible by blocksize. 3137 3138 If the nnz parameter is given then the nz parameter is ignored 3139 3140 A nonzero block is any block that as 1 or more nonzeros in it 3141 3142 The block AIJ format is fully compatible with standard Fortran 77 3143 storage. That is, the stored row and column indices can begin at 3144 either one (as in Fortran) or zero. See the users' manual for details. 3145 3146 Specify the preallocated storage with either nz or nnz (not both). 3147 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3148 allocation. For additional details, see the users manual chapter on 3149 matrices. 3150 3151 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3152 @*/ 3153 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3154 { 3155 PetscErrorCode ierr; 3156 3157 PetscFunctionBegin; 3158 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3159 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3160 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3161 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164 3165 #undef __FUNCT__ 3166 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3167 /*@C 3168 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3169 per row in the matrix. For good matrix assembly performance the 3170 user should preallocate the matrix storage by setting the parameter nz 3171 (or the array nnz). By setting these parameters accurately, performance 3172 during matrix assembly can be increased by more than a factor of 50. 3173 3174 Collective on MPI_Comm 3175 3176 Input Parameters: 3177 + A - the matrix 3178 . bs - size of block 3179 . nz - number of block nonzeros per block row (same for all rows) 3180 - nnz - array containing the number of block nonzeros in the various block rows 3181 (possibly different for each block row) or PETSC_NULL 3182 3183 Options Database Keys: 3184 . -mat_no_unroll - uses code that does not unroll the loops in the 3185 block calculations (much slower) 3186 . -mat_block_size - size of the blocks to use 3187 3188 Level: intermediate 3189 3190 Notes: 3191 If the nnz parameter is given then the nz parameter is ignored 3192 3193 You can call MatGetInfo() to get information on how effective the preallocation was; 3194 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3195 You can also run with the option -info and look for messages with the string 3196 malloc in them to see if additional memory allocation was needed. 3197 3198 The block AIJ format is fully compatible with standard Fortran 77 3199 storage. That is, the stored row and column indices can begin at 3200 either one (as in Fortran) or zero. See the users' manual for details. 3201 3202 Specify the preallocated storage with either nz or nnz (not both). 3203 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3204 allocation. For additional details, see the users manual chapter on 3205 matrices. 3206 3207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3208 @*/ 3209 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3210 { 3211 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3212 3213 PetscFunctionBegin; 3214 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3215 if (f) { 3216 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3217 } 3218 PetscFunctionReturn(0); 3219 } 3220 3221 #undef __FUNCT__ 3222 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3223 /*@C 3224 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3225 (the default sequential PETSc format). 3226 3227 Collective on MPI_Comm 3228 3229 Input Parameters: 3230 + A - the matrix 3231 . i - the indices into j for the start of each local row (starts with zero) 3232 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3233 - v - optional values in the matrix 3234 3235 Level: developer 3236 3237 .keywords: matrix, aij, compressed row, sparse 3238 3239 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3240 @*/ 3241 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3242 { 3243 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3244 3245 PetscFunctionBegin; 3246 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3247 if (f) { 3248 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3249 } 3250 PetscFunctionReturn(0); 3251 } 3252 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3256 /*@ 3257 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3258 (upper triangular entries in CSR format) provided by the user. 3259 3260 Collective on MPI_Comm 3261 3262 Input Parameters: 3263 + comm - must be an MPI communicator of size 1 3264 . bs - size of block 3265 . m - number of rows 3266 . n - number of columns 3267 . i - row indices 3268 . j - column indices 3269 - a - matrix values 3270 3271 Output Parameter: 3272 . mat - the matrix 3273 3274 Level: intermediate 3275 3276 Notes: 3277 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3278 once the matrix is destroyed 3279 3280 You cannot set new nonzero locations into this matrix, that will generate an error. 3281 3282 The i and j indices are 0 based 3283 3284 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3285 3286 @*/ 3287 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3288 { 3289 PetscErrorCode ierr; 3290 PetscInt ii; 3291 Mat_SeqBAIJ *baij; 3292 3293 PetscFunctionBegin; 3294 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3295 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3296 3297 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3298 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3299 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3300 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3301 baij = (Mat_SeqBAIJ*)(*mat)->data; 3302 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3303 3304 baij->i = i; 3305 baij->j = j; 3306 baij->a = a; 3307 baij->singlemalloc = PETSC_FALSE; 3308 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3309 baij->free_a = PETSC_FALSE; 3310 baij->free_ij = PETSC_FALSE; 3311 3312 for (ii=0; ii<m; ii++) { 3313 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3314 #if defined(PETSC_USE_DEBUG) 3315 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3316 #endif 3317 } 3318 #if defined(PETSC_USE_DEBUG) 3319 for (ii=0; ii<baij->i[m]; ii++) { 3320 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3321 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3322 } 3323 #endif 3324 3325 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329