1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 default: 110 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 111 } 112 a->idiagvalid = PETSC_TRUE; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 120 { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 122 PetscScalar *x,x1,s1; 123 const PetscScalar *b; 124 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 125 PetscErrorCode ierr; 126 PetscInt m = a->mbs,i,i2,nz,idx; 127 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 128 129 PetscFunctionBegin; 130 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 131 its = its*lits; 132 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 133 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 134 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 135 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 136 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 137 138 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 139 140 diag = a->diag; 141 idiag = a->idiag; 142 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 143 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 144 145 if (flag & SOR_ZERO_INITIAL_GUESS) { 146 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 147 x[0] = b[0]*idiag[0]; 148 i2 = 1; 149 idiag += 1; 150 for (i=1; i<m; i++) { 151 v = aa + ai[i]; 152 vi = aj + ai[i]; 153 nz = diag[i] - ai[i]; 154 s1 = b[i2]; 155 while (nz--) { 156 idx = (*vi++); 157 x1 = x[idx]; 158 s1 -= v[0]*x1; 159 v += 1; 160 } 161 x[i2] = idiag[0]*s1; 162 idiag += 1; 163 i2 += 1; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; 174 x[i2] = mdiag[0]*x1; 175 mdiag += 1; 176 i2 += 1; 177 } 178 ierr = PetscLogFlops(m);CHKERRQ(ierr); 179 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 180 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 183 idiag = a->idiag+a->mbs - 1; 184 i2 = m - 1; 185 x1 = x[i2]; 186 x[i2] = idiag[0]*x1; 187 idiag -= 1; 188 i2 -= 1; 189 for (i=m-2; i>=0; i--) { 190 v = aa + (diag[i]+1); 191 vi = aj + diag[i] + 1; 192 nz = ai[i+1] - diag[i] - 1; 193 s1 = x[i2]; 194 while (nz--) { 195 idx = (*vi++); 196 x1 = x[idx]; 197 s1 -= v[0]*x1; 198 v += 1; 199 } 200 x[i2] = idiag[0]*s1; 201 idiag -= 2; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else { 207 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 208 } 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 PetscScalar *x,x1,x2,s1,s2; 220 const PetscScalar *b; 221 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 222 PetscErrorCode ierr; 223 PetscInt m = a->mbs,i,i2,nz,idx; 224 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225 226 PetscFunctionBegin; 227 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 228 its = its*lits; 229 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234 235 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 246 i2 = 2; 247 idiag += 4; 248 for (i=1; i<m; i++) { 249 v = aa + 4*ai[i]; 250 vi = aj + ai[i]; 251 nz = diag[i] - ai[i]; 252 s1 = b[i2]; s2 = b[i2+1]; 253 while (nz--) { 254 idx = 2*(*vi++); 255 x1 = x[idx]; x2 = x[1+idx]; 256 s1 -= v[0]*x1 + v[2]*x2; 257 s2 -= v[1]*x1 + v[3]*x2; 258 v += 4; 259 } 260 x[i2] = idiag[0]*s1 + idiag[2]*s2; 261 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 262 idiag += 4; 263 i2 += 2; 264 } 265 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 266 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 267 } 268 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 269 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 270 i2 = 0; 271 mdiag = a->idiag+4*a->mbs; 272 for (i=0; i<m; i++) { 273 x1 = x[i2]; x2 = x[i2+1]; 274 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 275 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 276 mdiag += 4; 277 i2 += 2; 278 } 279 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 280 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 281 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 282 } 283 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 284 idiag = a->idiag+4*a->mbs - 4; 285 i2 = 2*m - 2; 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = idiag[0]*x1 + idiag[2]*x2; 288 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 289 idiag -= 4; 290 i2 -= 2; 291 for (i=m-2; i>=0; i--) { 292 v = aa + 4*(diag[i]+1); 293 vi = aj + diag[i] + 1; 294 nz = ai[i+1] - diag[i] - 1; 295 s1 = x[i2]; s2 = x[i2+1]; 296 while (nz--) { 297 idx = 2*(*vi++); 298 x1 = x[idx]; x2 = x[1+idx]; 299 s1 -= v[0]*x1 + v[2]*x2; 300 s2 -= v[1]*x1 + v[3]*x2; 301 v += 4; 302 } 303 x[i2] = idiag[0]*s1 + idiag[2]*s2; 304 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 305 idiag -= 4; 306 i2 -= 2; 307 } 308 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 312 } 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 420 } 421 } else { 422 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 423 } 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 435 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 436 const PetscScalar *b; 437 PetscErrorCode ierr; 438 PetscInt m = a->mbs,i,i2,nz,idx; 439 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 440 441 PetscFunctionBegin; 442 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 443 its = its*lits; 444 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 445 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 446 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 447 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 448 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 449 450 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 451 452 diag = a->diag; 453 idiag = a->idiag; 454 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 455 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 456 457 if (flag & SOR_ZERO_INITIAL_GUESS) { 458 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 459 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 460 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 461 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 462 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 463 i2 = 4; 464 idiag += 16; 465 for (i=1; i<m; i++) { 466 v = aa + 16*ai[i]; 467 vi = aj + ai[i]; 468 nz = diag[i] - ai[i]; 469 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 473 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 474 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 475 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 476 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 477 v += 16; 478 } 479 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 480 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 481 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 482 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 483 idiag += 16; 484 i2 += 4; 485 } 486 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 487 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 488 } 489 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 490 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 491 i2 = 0; 492 mdiag = a->idiag+16*a->mbs; 493 for (i=0; i<m; i++) { 494 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 495 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 496 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 497 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 498 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 499 mdiag += 16; 500 i2 += 4; 501 } 502 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 503 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 504 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 505 } 506 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 507 idiag = a->idiag+16*a->mbs - 16; 508 i2 = 4*m - 4; 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 510 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 511 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 512 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 513 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 514 idiag -= 16; 515 i2 -= 4; 516 for (i=m-2; i>=0; i--) { 517 v = aa + 16*(diag[i]+1); 518 vi = aj + diag[i] + 1; 519 nz = ai[i+1] - diag[i] - 1; 520 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 521 while (nz--) { 522 idx = 4*(*vi++); 523 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 524 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 525 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 526 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 527 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 528 v += 16; 529 } 530 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 531 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 532 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 533 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 534 idiag -= 16; 535 i2 -= 4; 536 } 537 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 538 } 539 } else { 540 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 541 } 542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 550 { 551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 552 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 553 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 554 const PetscScalar *b; 555 PetscErrorCode ierr; 556 PetscInt m = a->mbs,i,i2,nz,idx; 557 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 558 559 PetscFunctionBegin; 560 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 561 its = its*lits; 562 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 563 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 564 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 565 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 566 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 567 568 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 569 570 diag = a->diag; 571 idiag = a->idiag; 572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 574 575 if (flag & SOR_ZERO_INITIAL_GUESS) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 577 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 578 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 579 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 580 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 581 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 582 i2 = 5; 583 idiag += 25; 584 for (i=1; i<m; i++) { 585 v = aa + 25*ai[i]; 586 vi = aj + ai[i]; 587 nz = diag[i] - ai[i]; 588 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 589 while (nz--) { 590 idx = 5*(*vi++); 591 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 592 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 593 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 594 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 595 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 596 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 597 v += 25; 598 } 599 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 600 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 601 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 602 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 603 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 604 idiag += 25; 605 i2 += 5; 606 } 607 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 608 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 609 } 610 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 611 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 612 i2 = 0; 613 mdiag = a->idiag+25*a->mbs; 614 for (i=0; i<m; i++) { 615 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 616 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 617 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 618 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 619 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 620 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 621 mdiag += 25; 622 i2 += 5; 623 } 624 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 625 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 626 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 627 } 628 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 629 idiag = a->idiag+25*a->mbs - 25; 630 i2 = 5*m - 5; 631 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 632 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 633 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 634 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 635 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 636 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 637 idiag -= 25; 638 i2 -= 5; 639 for (i=m-2; i>=0; i--) { 640 v = aa + 25*(diag[i]+1); 641 vi = aj + diag[i] + 1; 642 nz = ai[i+1] - diag[i] - 1; 643 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 644 while (nz--) { 645 idx = 5*(*vi++); 646 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 647 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 648 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 649 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 650 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 651 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 652 v += 25; 653 } 654 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 655 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 656 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 657 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 658 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 659 idiag -= 25; 660 i2 -= 5; 661 } 662 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 663 } 664 } else { 665 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 666 } 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 675 { 676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 677 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 678 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 679 const PetscScalar *b; 680 PetscErrorCode ierr; 681 PetscInt m = a->mbs,i,i2,nz,idx; 682 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 683 684 PetscFunctionBegin; 685 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 686 its = its*lits; 687 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 688 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 689 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 690 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 691 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 692 693 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 694 695 diag = a->diag; 696 idiag = a->idiag; 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 699 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 702 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 703 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 704 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 705 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 706 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 707 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 708 i2 = 6; 709 idiag += 36; 710 for (i=1; i<m; i++) { 711 v = aa + 36*ai[i]; 712 vi = aj + ai[i]; 713 nz = diag[i] - ai[i]; 714 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 715 while (nz--) { 716 idx = 6*(*vi++); 717 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 718 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 719 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 720 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 721 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 722 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 723 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 724 v += 36; 725 } 726 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 727 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 728 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 729 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 730 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 731 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 732 idiag += 36; 733 i2 += 6; 734 } 735 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 736 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 737 } 738 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 739 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 740 i2 = 0; 741 mdiag = a->idiag+36*a->mbs; 742 for (i=0; i<m; i++) { 743 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 744 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 745 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 746 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 747 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 748 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 749 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 750 mdiag += 36; 751 i2 += 6; 752 } 753 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 754 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 755 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 756 } 757 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 758 idiag = a->idiag+36*a->mbs - 36; 759 i2 = 6*m - 6; 760 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 761 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 762 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 763 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 764 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 765 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 766 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 767 idiag -= 36; 768 i2 -= 6; 769 for (i=m-2; i>=0; i--) { 770 v = aa + 36*(diag[i]+1); 771 vi = aj + diag[i] + 1; 772 nz = ai[i+1] - diag[i] - 1; 773 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 774 while (nz--) { 775 idx = 6*(*vi++); 776 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 777 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 778 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 779 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 780 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 781 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 782 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 783 v += 36; 784 } 785 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 786 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 787 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 788 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 789 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 790 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 791 idiag -= 36; 792 i2 -= 6; 793 } 794 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 795 } 796 } else { 797 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 798 } 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 810 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 811 const PetscScalar *b; 812 PetscErrorCode ierr; 813 PetscInt m = a->mbs,i,i2,nz,idx; 814 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 815 816 PetscFunctionBegin; 817 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 818 its = its*lits; 819 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 820 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 821 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 822 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 823 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 824 825 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 826 827 diag = a->diag; 828 idiag = a->idiag; 829 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 830 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 831 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 834 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 835 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 836 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 837 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 838 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 839 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 840 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 841 i2 = 7; 842 idiag += 49; 843 for (i=1; i<m; i++) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = diag[i] - ai[i]; 847 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 848 while (nz--) { 849 idx = 7*(*vi++); 850 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 851 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 852 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 853 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 854 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 855 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 856 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 857 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 858 v += 49; 859 } 860 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 861 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 862 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 863 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 864 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 865 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 866 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 867 idiag += 49; 868 i2 += 7; 869 } 870 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 871 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 872 } 873 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 874 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 875 i2 = 0; 876 mdiag = a->idiag+49*a->mbs; 877 for (i=0; i<m; i++) { 878 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 879 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 880 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 881 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 882 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 883 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 884 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 885 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 886 mdiag += 36; 887 i2 += 6; 888 } 889 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 890 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 891 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 894 idiag = a->idiag+49*a->mbs - 49; 895 i2 = 7*m - 7; 896 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 897 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 898 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 899 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 900 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 901 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 902 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 903 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 904 idiag -= 49; 905 i2 -= 7; 906 for (i=m-2; i>=0; i--) { 907 v = aa + 49*(diag[i]+1); 908 vi = aj + diag[i] + 1; 909 nz = ai[i+1] - diag[i] - 1; 910 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 911 while (nz--) { 912 idx = 7*(*vi++); 913 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 914 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 915 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 916 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 917 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 918 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 919 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 920 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 921 v += 49; 922 } 923 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 924 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 925 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 926 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 927 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 928 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 929 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 930 idiag -= 49; 931 i2 -= 7; 932 } 933 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 934 } 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 937 } 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /* 944 Special version for direct calls from Fortran (Used in PETSc-fun3d) 945 */ 946 #if defined(PETSC_HAVE_FORTRAN_CAPS) 947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 949 #define matsetvaluesblocked4_ matsetvaluesblocked4 950 #endif 951 952 EXTERN_C_BEGIN 953 #undef __FUNCT__ 954 #define __FUNCT__ "matsetvaluesblocked4_" 955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 956 { 957 Mat A = *AA; 958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 959 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 960 PetscInt *ai=a->i,*ailen=a->ilen; 961 PetscInt *aj=a->j,stepval,lastcol = -1; 962 const PetscScalar *value = v; 963 MatScalar *ap,*aa = a->a,*bap; 964 965 PetscFunctionBegin; 966 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 967 stepval = (n-1)*4; 968 for (k=0; k<m; k++) { /* loop over added rows */ 969 row = im[k]; 970 rp = aj + ai[row]; 971 ap = aa + 16*ai[row]; 972 nrow = ailen[row]; 973 low = 0; 974 high = nrow; 975 for (l=0; l<n; l++) { /* loop over added columns */ 976 col = in[l]; 977 if (col <= lastcol) low = 0; else high = nrow; 978 lastcol = col; 979 value = v + k*(stepval+4 + l)*4; 980 while (high-low > 7) { 981 t = (low+high)/2; 982 if (rp[t] > col) high = t; 983 else low = t; 984 } 985 for (i=low; i<high; i++) { 986 if (rp[i] > col) break; 987 if (rp[i] == col) { 988 bap = ap + 16*i; 989 for (ii=0; ii<4; ii++,value+=stepval) { 990 for (jj=ii; jj<16; jj+=4) { 991 bap[jj] += *value++; 992 } 993 } 994 goto noinsert2; 995 } 996 } 997 N = nrow++ - 1; 998 high++; /* added new column index thus must search to one higher than before */ 999 /* shift up all the later entries in this row */ 1000 for (ii=N; ii>=i; ii--) { 1001 rp[ii+1] = rp[ii]; 1002 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1003 } 1004 if (N >= i) { 1005 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1006 } 1007 rp[i] = col; 1008 bap = ap + 16*i; 1009 for (ii=0; ii<4; ii++,value+=stepval) { 1010 for (jj=ii; jj<16; jj+=4) { 1011 bap[jj] = *value++; 1012 } 1013 } 1014 noinsert2:; 1015 low = i; 1016 } 1017 ailen[row] = nrow; 1018 } 1019 PetscFunctionReturnVoid(); 1020 } 1021 EXTERN_C_END 1022 1023 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1024 #define matsetvalues4_ MATSETVALUES4 1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1026 #define matsetvalues4_ matsetvalues4 1027 #endif 1028 1029 EXTERN_C_BEGIN 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSetValues4_" 1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1033 { 1034 Mat A = *AA; 1035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1036 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1037 PetscInt *ai=a->i,*ailen=a->ilen; 1038 PetscInt *aj=a->j,brow,bcol; 1039 PetscInt ridx,cidx,lastcol = -1; 1040 MatScalar *ap,value,*aa=a->a,*bap; 1041 1042 PetscFunctionBegin; 1043 for (k=0; k<m; k++) { /* loop over added rows */ 1044 row = im[k]; brow = row/4; 1045 rp = aj + ai[brow]; 1046 ap = aa + 16*ai[brow]; 1047 nrow = ailen[brow]; 1048 low = 0; 1049 high = nrow; 1050 for (l=0; l<n; l++) { /* loop over added columns */ 1051 col = in[l]; bcol = col/4; 1052 ridx = row % 4; cidx = col % 4; 1053 value = v[l + k*n]; 1054 if (col <= lastcol) low = 0; else high = nrow; 1055 lastcol = col; 1056 while (high-low > 7) { 1057 t = (low+high)/2; 1058 if (rp[t] > bcol) high = t; 1059 else low = t; 1060 } 1061 for (i=low; i<high; i++) { 1062 if (rp[i] > bcol) break; 1063 if (rp[i] == bcol) { 1064 bap = ap + 16*i + 4*cidx + ridx; 1065 *bap += value; 1066 goto noinsert1; 1067 } 1068 } 1069 N = nrow++ - 1; 1070 high++; /* added new column thus must search to one higher than before */ 1071 /* shift up all the later entries in this row */ 1072 for (ii=N; ii>=i; ii--) { 1073 rp[ii+1] = rp[ii]; 1074 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1075 } 1076 if (N>=i) { 1077 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1078 } 1079 rp[i] = bcol; 1080 ap[16*i + 4*cidx + ridx] = value; 1081 noinsert1:; 1082 low = i; 1083 } 1084 ailen[brow] = nrow; 1085 } 1086 PetscFunctionReturnVoid(); 1087 } 1088 EXTERN_C_END 1089 1090 /* 1091 Checks for missing diagonals 1092 */ 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt *diag,*jj = a->j,i; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1103 *missing = PETSC_FALSE; 1104 diag = a->diag; 1105 for (i=0; i<a->mbs; i++) { 1106 if (jj[diag[i]] != i) { 1107 *missing = PETSC_TRUE; 1108 if (d) *d = i; 1109 } 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscInt i,j,m = a->mbs; 1121 1122 PetscFunctionBegin; 1123 if (!a->diag) { 1124 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1125 } 1126 for (i=0; i<m; i++) { 1127 a->diag[i] = a->i[i+1]; 1128 for (j=a->i[i]; j<a->i[i+1]; j++) { 1129 if (a->j[j] == i) { 1130 a->diag[i] = j; 1131 break; 1132 } 1133 } 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 1148 PetscInt *tia, *tja; 1149 1150 PetscFunctionBegin; 1151 *nn = n; 1152 if (!ia) PetscFunctionReturn(0); 1153 if (symmetric) { 1154 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1155 } else { 1156 tia = a->i; tja = a->j; 1157 } 1158 1159 if (!blockcompressed && bs > 1) { 1160 (*nn) *= bs; 1161 nbs = bs; 1162 /* malloc & create the natural set of indices */ 1163 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1164 if (n) { 1165 (*ia)[0] = 0; 1166 for (j=1; j<bs; j++) { 1167 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1168 } 1169 } 1170 1171 for (i=1; i<n; i++) { 1172 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1173 for (j=1; j<bs; j++) { 1174 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1175 } 1176 } 1177 if (n) { 1178 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1179 } 1180 1181 if (ja) { 1182 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<n; i++) { 1185 for (j=0; j<bs; j++) { 1186 for (k=tia[i]; k<tia[i+1]; k++) { 1187 for (l=0; l<bs; l++) { 1188 (*ja)[cnt++] = bs*tja[k] + l; 1189 } 1190 } 1191 } 1192 } 1193 } 1194 1195 n *= bs; 1196 nz *= bs*bs; 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 ierr = PetscFree(tia);CHKERRQ(ierr); 1199 ierr = PetscFree(tja);CHKERRQ(ierr); 1200 } 1201 } else { 1202 *ia = tia; 1203 if (ja) *ja = tja; 1204 } 1205 if (oshift == 1) { 1206 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1207 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt i,n = a->mbs,nz = a->i[n]; 1219 1220 PetscFunctionBegin; 1221 if (!ia) PetscFunctionReturn(0); 1222 if (!blockcompressed && A->rmap.bs > 1) { 1223 ierr = PetscFree(*ia);CHKERRQ(ierr); 1224 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1225 } else if (symmetric) { 1226 ierr = PetscFree(*ia);CHKERRQ(ierr); 1227 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1228 } else if (oshift == 1) { /* blockcompressed */ 1229 for (i=0; i<n+1; i++) a->i[i]--; 1230 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1238 { 1239 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 #if defined(PETSC_USE_LOG) 1244 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 1245 #endif 1246 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1247 if (a->row) { 1248 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1249 } 1250 if (a->col) { 1251 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1254 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1255 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1256 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1257 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1258 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1259 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1260 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1261 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1262 1263 ierr = PetscFree(a);CHKERRQ(ierr); 1264 1265 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1266 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1279 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1280 { 1281 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1282 PetscErrorCode ierr; 1283 1284 PetscFunctionBegin; 1285 switch (op) { 1286 case MAT_ROW_ORIENTED: 1287 a->roworiented = flg; 1288 break; 1289 case MAT_KEEP_ZEROED_ROWS: 1290 a->keepzeroedrows = flg; 1291 break; 1292 case MAT_NEW_NONZERO_LOCATIONS: 1293 a->nonew = (flg ? 0 : 1); 1294 break; 1295 case MAT_NEW_NONZERO_LOCATION_ERR: 1296 a->nonew = (flg ? -1 : 0); 1297 break; 1298 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1299 a->nonew = (flg ? -2 : 0); 1300 break; 1301 case MAT_NEW_DIAGONALS: 1302 case MAT_IGNORE_OFF_PROC_ENTRIES: 1303 case MAT_USE_HASH_TABLE: 1304 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1305 break; 1306 case MAT_SYMMETRIC: 1307 case MAT_STRUCTURALLY_SYMMETRIC: 1308 case MAT_HERMITIAN: 1309 case MAT_SYMMETRY_ETERNAL: 1310 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1311 break; 1312 default: 1313 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1314 } 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1320 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1321 { 1322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1323 PetscErrorCode ierr; 1324 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1325 MatScalar *aa,*aa_i; 1326 PetscScalar *v_i; 1327 1328 PetscFunctionBegin; 1329 bs = A->rmap.bs; 1330 ai = a->i; 1331 aj = a->j; 1332 aa = a->a; 1333 bs2 = a->bs2; 1334 1335 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1336 1337 bn = row/bs; /* Block number */ 1338 bp = row % bs; /* Block Position */ 1339 M = ai[bn+1] - ai[bn]; 1340 *nz = bs*M; 1341 1342 if (v) { 1343 *v = 0; 1344 if (*nz) { 1345 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1346 for (i=0; i<M; i++) { /* for each block in the block row */ 1347 v_i = *v + i*bs; 1348 aa_i = aa + bs2*(ai[bn] + i); 1349 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1350 } 1351 } 1352 } 1353 1354 if (idx) { 1355 *idx = 0; 1356 if (*nz) { 1357 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1358 for (i=0; i<M; i++) { /* for each block in the block row */ 1359 idx_i = *idx + i*bs; 1360 itmp = bs*aj[ai[bn] + i]; 1361 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1362 } 1363 } 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1370 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1371 { 1372 PetscErrorCode ierr; 1373 1374 PetscFunctionBegin; 1375 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1376 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1382 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1383 { 1384 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1385 Mat C; 1386 PetscErrorCode ierr; 1387 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1388 PetscInt *rows,*cols,bs2=a->bs2; 1389 MatScalar *array; 1390 1391 PetscFunctionBegin; 1392 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1393 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1394 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1395 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1396 1397 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1398 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1399 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1400 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1401 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1402 ierr = PetscFree(col);CHKERRQ(ierr); 1403 } else { 1404 C = *B; 1405 } 1406 1407 array = a->a; 1408 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1409 cols = rows + bs; 1410 for (i=0; i<mbs; i++) { 1411 cols[0] = i*bs; 1412 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1413 len = ai[i+1] - ai[i]; 1414 for (j=0; j<len; j++) { 1415 rows[0] = (*aj++)*bs; 1416 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1417 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1418 array += bs2; 1419 } 1420 } 1421 ierr = PetscFree(rows);CHKERRQ(ierr); 1422 1423 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1425 1426 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1427 *B = C; 1428 } else { 1429 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1436 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1437 { 1438 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1439 PetscErrorCode ierr; 1440 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1441 int fd; 1442 PetscScalar *aa; 1443 FILE *file; 1444 1445 PetscFunctionBegin; 1446 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1447 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1448 col_lens[0] = MAT_FILE_COOKIE; 1449 1450 col_lens[1] = A->rmap.N; 1451 col_lens[2] = A->cmap.n; 1452 col_lens[3] = a->nz*bs2; 1453 1454 /* store lengths of each row and write (including header) to file */ 1455 count = 0; 1456 for (i=0; i<a->mbs; i++) { 1457 for (j=0; j<bs; j++) { 1458 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1459 } 1460 } 1461 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1462 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1463 1464 /* store column indices (zero start index) */ 1465 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1466 count = 0; 1467 for (i=0; i<a->mbs; i++) { 1468 for (j=0; j<bs; j++) { 1469 for (k=a->i[i]; k<a->i[i+1]; k++) { 1470 for (l=0; l<bs; l++) { 1471 jj[count++] = bs*a->j[k] + l; 1472 } 1473 } 1474 } 1475 } 1476 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1477 ierr = PetscFree(jj);CHKERRQ(ierr); 1478 1479 /* store nonzero values */ 1480 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1481 count = 0; 1482 for (i=0; i<a->mbs; i++) { 1483 for (j=0; j<bs; j++) { 1484 for (k=a->i[i]; k<a->i[i+1]; k++) { 1485 for (l=0; l<bs; l++) { 1486 aa[count++] = a->a[bs2*k + l*bs + j]; 1487 } 1488 } 1489 } 1490 } 1491 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1492 ierr = PetscFree(aa);CHKERRQ(ierr); 1493 1494 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1495 if (file) { 1496 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1497 } 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1503 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1504 { 1505 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1506 PetscErrorCode ierr; 1507 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1508 PetscViewerFormat format; 1509 1510 PetscFunctionBegin; 1511 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1512 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1513 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1514 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1515 Mat aij; 1516 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1517 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1518 ierr = MatDestroy(aij);CHKERRQ(ierr); 1519 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1520 PetscFunctionReturn(0); 1521 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1522 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1523 for (i=0; i<a->mbs; i++) { 1524 for (j=0; j<bs; j++) { 1525 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1526 for (k=a->i[i]; k<a->i[i+1]; k++) { 1527 for (l=0; l<bs; l++) { 1528 #if defined(PETSC_USE_COMPLEX) 1529 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1531 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1532 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1533 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1534 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1535 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1537 } 1538 #else 1539 if (a->a[bs2*k + l*bs + j] != 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1541 } 1542 #endif 1543 } 1544 } 1545 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1546 } 1547 } 1548 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1551 for (i=0; i<a->mbs; i++) { 1552 for (j=0; j<bs; j++) { 1553 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1554 for (k=a->i[i]; k<a->i[i+1]; k++) { 1555 for (l=0; l<bs; l++) { 1556 #if defined(PETSC_USE_COMPLEX) 1557 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1558 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1559 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1560 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1562 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1563 } else { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } 1566 #else 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1568 #endif 1569 } 1570 } 1571 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1572 } 1573 } 1574 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1575 } 1576 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1582 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1583 { 1584 Mat A = (Mat) Aa; 1585 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1586 PetscErrorCode ierr; 1587 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1588 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1589 MatScalar *aa; 1590 PetscViewer viewer; 1591 1592 PetscFunctionBegin; 1593 1594 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1595 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1596 1597 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1598 1599 /* loop over matrix elements drawing boxes */ 1600 color = PETSC_DRAW_BLUE; 1601 for (i=0,row=0; i<mbs; i++,row+=bs) { 1602 for (j=a->i[i]; j<a->i[i+1]; j++) { 1603 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1604 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1605 aa = a->a + j*bs2; 1606 for (k=0; k<bs; k++) { 1607 for (l=0; l<bs; l++) { 1608 if (PetscRealPart(*aa++) >= 0.) continue; 1609 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1610 } 1611 } 1612 } 1613 } 1614 color = PETSC_DRAW_CYAN; 1615 for (i=0,row=0; i<mbs; i++,row+=bs) { 1616 for (j=a->i[i]; j<a->i[i+1]; j++) { 1617 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1618 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1619 aa = a->a + j*bs2; 1620 for (k=0; k<bs; k++) { 1621 for (l=0; l<bs; l++) { 1622 if (PetscRealPart(*aa++) != 0.) continue; 1623 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1624 } 1625 } 1626 } 1627 } 1628 1629 color = PETSC_DRAW_RED; 1630 for (i=0,row=0; i<mbs; i++,row+=bs) { 1631 for (j=a->i[i]; j<a->i[i+1]; j++) { 1632 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1633 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1634 aa = a->a + j*bs2; 1635 for (k=0; k<bs; k++) { 1636 for (l=0; l<bs; l++) { 1637 if (PetscRealPart(*aa++) <= 0.) continue; 1638 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1639 } 1640 } 1641 } 1642 } 1643 PetscFunctionReturn(0); 1644 } 1645 1646 #undef __FUNCT__ 1647 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1648 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1649 { 1650 PetscErrorCode ierr; 1651 PetscReal xl,yl,xr,yr,w,h; 1652 PetscDraw draw; 1653 PetscTruth isnull; 1654 1655 PetscFunctionBegin; 1656 1657 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1658 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1659 1660 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1661 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1662 xr += w; yr += h; xl = -w; yl = -h; 1663 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1664 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1665 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatView_SeqBAIJ" 1671 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1672 { 1673 PetscErrorCode ierr; 1674 PetscTruth iascii,isbinary,isdraw; 1675 1676 PetscFunctionBegin; 1677 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1678 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1680 if (iascii){ 1681 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1682 } else if (isbinary) { 1683 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1684 } else if (isdraw) { 1685 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1686 } else { 1687 Mat B; 1688 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1689 ierr = MatView(B,viewer);CHKERRQ(ierr); 1690 ierr = MatDestroy(B);CHKERRQ(ierr); 1691 } 1692 PetscFunctionReturn(0); 1693 } 1694 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1698 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1699 { 1700 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1701 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1702 PetscInt *ai = a->i,*ailen = a->ilen; 1703 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1704 MatScalar *ap,*aa = a->a; 1705 1706 PetscFunctionBegin; 1707 for (k=0; k<m; k++) { /* loop over rows */ 1708 row = im[k]; brow = row/bs; 1709 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1710 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1711 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1712 nrow = ailen[brow]; 1713 for (l=0; l<n; l++) { /* loop over columns */ 1714 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1715 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1716 col = in[l] ; 1717 bcol = col/bs; 1718 cidx = col%bs; 1719 ridx = row%bs; 1720 high = nrow; 1721 low = 0; /* assume unsorted */ 1722 while (high-low > 5) { 1723 t = (low+high)/2; 1724 if (rp[t] > bcol) high = t; 1725 else low = t; 1726 } 1727 for (i=low; i<high; i++) { 1728 if (rp[i] > bcol) break; 1729 if (rp[i] == bcol) { 1730 *v++ = ap[bs2*i+bs*cidx+ridx]; 1731 goto finished; 1732 } 1733 } 1734 *v++ = 0.0; 1735 finished:; 1736 } 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 #define CHUNKSIZE 10 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1744 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1745 { 1746 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1747 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1748 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1749 PetscErrorCode ierr; 1750 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1751 PetscTruth roworiented=a->roworiented; 1752 const PetscScalar *value = v; 1753 MatScalar *ap,*aa = a->a,*bap; 1754 1755 PetscFunctionBegin; 1756 if (roworiented) { 1757 stepval = (n-1)*bs; 1758 } else { 1759 stepval = (m-1)*bs; 1760 } 1761 for (k=0; k<m; k++) { /* loop over added rows */ 1762 row = im[k]; 1763 if (row < 0) continue; 1764 #if defined(PETSC_USE_DEBUG) 1765 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1766 #endif 1767 rp = aj + ai[row]; 1768 ap = aa + bs2*ai[row]; 1769 rmax = imax[row]; 1770 nrow = ailen[row]; 1771 low = 0; 1772 high = nrow; 1773 for (l=0; l<n; l++) { /* loop over added columns */ 1774 if (in[l] < 0) continue; 1775 #if defined(PETSC_USE_DEBUG) 1776 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1777 #endif 1778 col = in[l]; 1779 if (roworiented) { 1780 value = v + k*(stepval+bs)*bs + l*bs; 1781 } else { 1782 value = v + l*(stepval+bs)*bs + k*bs; 1783 } 1784 if (col <= lastcol) low = 0; else high = nrow; 1785 lastcol = col; 1786 while (high-low > 7) { 1787 t = (low+high)/2; 1788 if (rp[t] > col) high = t; 1789 else low = t; 1790 } 1791 for (i=low; i<high; i++) { 1792 if (rp[i] > col) break; 1793 if (rp[i] == col) { 1794 bap = ap + bs2*i; 1795 if (roworiented) { 1796 if (is == ADD_VALUES) { 1797 for (ii=0; ii<bs; ii++,value+=stepval) { 1798 for (jj=ii; jj<bs2; jj+=bs) { 1799 bap[jj] += *value++; 1800 } 1801 } 1802 } else { 1803 for (ii=0; ii<bs; ii++,value+=stepval) { 1804 for (jj=ii; jj<bs2; jj+=bs) { 1805 bap[jj] = *value++; 1806 } 1807 } 1808 } 1809 } else { 1810 if (is == ADD_VALUES) { 1811 for (ii=0; ii<bs; ii++,value+=stepval) { 1812 for (jj=0; jj<bs; jj++) { 1813 *bap++ += *value++; 1814 } 1815 } 1816 } else { 1817 for (ii=0; ii<bs; ii++,value+=stepval) { 1818 for (jj=0; jj<bs; jj++) { 1819 *bap++ = *value++; 1820 } 1821 } 1822 } 1823 } 1824 goto noinsert2; 1825 } 1826 } 1827 if (nonew == 1) goto noinsert2; 1828 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1829 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1830 N = nrow++ - 1; high++; 1831 /* shift up all the later entries in this row */ 1832 for (ii=N; ii>=i; ii--) { 1833 rp[ii+1] = rp[ii]; 1834 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1835 } 1836 if (N >= i) { 1837 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1838 } 1839 rp[i] = col; 1840 bap = ap + bs2*i; 1841 if (roworiented) { 1842 for (ii=0; ii<bs; ii++,value+=stepval) { 1843 for (jj=ii; jj<bs2; jj+=bs) { 1844 bap[jj] = *value++; 1845 } 1846 } 1847 } else { 1848 for (ii=0; ii<bs; ii++,value+=stepval) { 1849 for (jj=0; jj<bs; jj++) { 1850 *bap++ = *value++; 1851 } 1852 } 1853 } 1854 noinsert2:; 1855 low = i; 1856 } 1857 ailen[row] = nrow; 1858 } 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1864 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1865 { 1866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1867 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1868 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1869 PetscErrorCode ierr; 1870 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1871 MatScalar *aa = a->a,*ap; 1872 PetscReal ratio=0.6; 1873 1874 PetscFunctionBegin; 1875 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1876 1877 if (m) rmax = ailen[0]; 1878 for (i=1; i<mbs; i++) { 1879 /* move each row back by the amount of empty slots (fshift) before it*/ 1880 fshift += imax[i-1] - ailen[i-1]; 1881 rmax = PetscMax(rmax,ailen[i]); 1882 if (fshift) { 1883 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1884 N = ailen[i]; 1885 for (j=0; j<N; j++) { 1886 ip[j-fshift] = ip[j]; 1887 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1888 } 1889 } 1890 ai[i] = ai[i-1] + ailen[i-1]; 1891 } 1892 if (mbs) { 1893 fshift += imax[mbs-1] - ailen[mbs-1]; 1894 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1895 } 1896 /* reset ilen and imax for each row */ 1897 for (i=0; i<mbs; i++) { 1898 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1899 } 1900 a->nz = ai[mbs]; 1901 1902 /* diagonals may have moved, so kill the diagonal pointers */ 1903 a->idiagvalid = PETSC_FALSE; 1904 if (fshift && a->diag) { 1905 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1906 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1907 a->diag = 0; 1908 } 1909 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1910 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1911 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1912 a->reallocs = 0; 1913 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1914 1915 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1916 if (a->compressedrow.use){ 1917 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1918 } 1919 1920 A->same_nonzero = PETSC_TRUE; 1921 PetscFunctionReturn(0); 1922 } 1923 1924 /* 1925 This function returns an array of flags which indicate the locations of contiguous 1926 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1927 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1928 Assume: sizes should be long enough to hold all the values. 1929 */ 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1932 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1933 { 1934 PetscInt i,j,k,row; 1935 PetscTruth flg; 1936 1937 PetscFunctionBegin; 1938 for (i=0,j=0; i<n; j++) { 1939 row = idx[i]; 1940 if (row%bs!=0) { /* Not the begining of a block */ 1941 sizes[j] = 1; 1942 i++; 1943 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1944 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1945 i++; 1946 } else { /* Begining of the block, so check if the complete block exists */ 1947 flg = PETSC_TRUE; 1948 for (k=1; k<bs; k++) { 1949 if (row+k != idx[i+k]) { /* break in the block */ 1950 flg = PETSC_FALSE; 1951 break; 1952 } 1953 } 1954 if (flg) { /* No break in the bs */ 1955 sizes[j] = bs; 1956 i+= bs; 1957 } else { 1958 sizes[j] = 1; 1959 i++; 1960 } 1961 } 1962 } 1963 *bs_max = j; 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1969 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1970 { 1971 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1972 PetscErrorCode ierr; 1973 PetscInt i,j,k,count,*rows; 1974 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1975 PetscScalar zero = 0.0; 1976 MatScalar *aa; 1977 1978 PetscFunctionBegin; 1979 /* Make a copy of the IS and sort it */ 1980 /* allocate memory for rows,sizes */ 1981 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1982 sizes = rows + is_n; 1983 1984 /* copy IS values to rows, and sort them */ 1985 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1986 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1987 if (baij->keepzeroedrows) { 1988 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1989 bs_max = is_n; 1990 A->same_nonzero = PETSC_TRUE; 1991 } else { 1992 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1993 A->same_nonzero = PETSC_FALSE; 1994 } 1995 1996 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1997 row = rows[j]; 1998 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1999 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2000 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2001 if (sizes[i] == bs && !baij->keepzeroedrows) { 2002 if (diag != 0.0) { 2003 if (baij->ilen[row/bs] > 0) { 2004 baij->ilen[row/bs] = 1; 2005 baij->j[baij->i[row/bs]] = row/bs; 2006 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2007 } 2008 /* Now insert all the diagonal values for this bs */ 2009 for (k=0; k<bs; k++) { 2010 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2011 } 2012 } else { /* (diag == 0.0) */ 2013 baij->ilen[row/bs] = 0; 2014 } /* end (diag == 0.0) */ 2015 } else { /* (sizes[i] != bs) */ 2016 #if defined (PETSC_USE_DEBUG) 2017 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2018 #endif 2019 for (k=0; k<count; k++) { 2020 aa[0] = zero; 2021 aa += bs; 2022 } 2023 if (diag != 0.0) { 2024 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2025 } 2026 } 2027 } 2028 2029 ierr = PetscFree(rows);CHKERRQ(ierr); 2030 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2036 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2037 { 2038 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2039 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2040 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2041 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 2042 PetscErrorCode ierr; 2043 PetscInt ridx,cidx,bs2=a->bs2; 2044 PetscTruth roworiented=a->roworiented; 2045 MatScalar *ap,value,*aa=a->a,*bap; 2046 2047 PetscFunctionBegin; 2048 for (k=0; k<m; k++) { /* loop over added rows */ 2049 row = im[k]; 2050 brow = row/bs; 2051 if (row < 0) continue; 2052 #if defined(PETSC_USE_DEBUG) 2053 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 2054 #endif 2055 rp = aj + ai[brow]; 2056 ap = aa + bs2*ai[brow]; 2057 rmax = imax[brow]; 2058 nrow = ailen[brow]; 2059 low = 0; 2060 high = nrow; 2061 for (l=0; l<n; l++) { /* loop over added columns */ 2062 if (in[l] < 0) continue; 2063 #if defined(PETSC_USE_DEBUG) 2064 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 2065 #endif 2066 col = in[l]; bcol = col/bs; 2067 ridx = row % bs; cidx = col % bs; 2068 if (roworiented) { 2069 value = v[l + k*n]; 2070 } else { 2071 value = v[k + l*m]; 2072 } 2073 if (col <= lastcol) low = 0; else high = nrow; 2074 lastcol = col; 2075 while (high-low > 7) { 2076 t = (low+high)/2; 2077 if (rp[t] > bcol) high = t; 2078 else low = t; 2079 } 2080 for (i=low; i<high; i++) { 2081 if (rp[i] > bcol) break; 2082 if (rp[i] == bcol) { 2083 bap = ap + bs2*i + bs*cidx + ridx; 2084 if (is == ADD_VALUES) *bap += value; 2085 else *bap = value; 2086 goto noinsert1; 2087 } 2088 } 2089 if (nonew == 1) goto noinsert1; 2090 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2091 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2092 N = nrow++ - 1; high++; 2093 /* shift up all the later entries in this row */ 2094 for (ii=N; ii>=i; ii--) { 2095 rp[ii+1] = rp[ii]; 2096 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2097 } 2098 if (N>=i) { 2099 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2100 } 2101 rp[i] = bcol; 2102 ap[bs2*i + bs*cidx + ridx] = value; 2103 a->nz++; 2104 noinsert1:; 2105 low = i; 2106 } 2107 ailen[brow] = nrow; 2108 } 2109 A->same_nonzero = PETSC_FALSE; 2110 PetscFunctionReturn(0); 2111 } 2112 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2116 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 2117 { 2118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2119 Mat outA; 2120 PetscErrorCode ierr; 2121 PetscTruth row_identity,col_identity; 2122 2123 PetscFunctionBegin; 2124 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2125 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2126 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2127 if (!row_identity || !col_identity) { 2128 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2129 } 2130 2131 outA = inA; 2132 inA->factor = MAT_FACTOR_LU; 2133 2134 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2135 2136 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2137 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2138 a->row = row; 2139 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2140 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2141 a->col = col; 2142 2143 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2144 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2145 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2146 2147 /* 2148 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 2149 for ILU(0) factorization with natural ordering 2150 */ 2151 if (inA->rmap.bs < 8) { 2152 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 2153 } else { 2154 if (!a->solve_work) { 2155 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2156 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2157 } 2158 } 2159 2160 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 2161 2162 PetscFunctionReturn(0); 2163 } 2164 2165 EXTERN_C_BEGIN 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2168 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2169 { 2170 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2171 PetscInt i,nz,nbs; 2172 2173 PetscFunctionBegin; 2174 nz = baij->maxnz/baij->bs2; 2175 nbs = baij->nbs; 2176 for (i=0; i<nz; i++) { 2177 baij->j[i] = indices[i]; 2178 } 2179 baij->nz = nz; 2180 for (i=0; i<nbs; i++) { 2181 baij->ilen[i] = baij->imax[i]; 2182 } 2183 2184 PetscFunctionReturn(0); 2185 } 2186 EXTERN_C_END 2187 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2190 /*@ 2191 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2192 in the matrix. 2193 2194 Input Parameters: 2195 + mat - the SeqBAIJ matrix 2196 - indices - the column indices 2197 2198 Level: advanced 2199 2200 Notes: 2201 This can be called if you have precomputed the nonzero structure of the 2202 matrix and want to provide it to the matrix object to improve the performance 2203 of the MatSetValues() operation. 2204 2205 You MUST have set the correct numbers of nonzeros per row in the call to 2206 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2207 2208 MUST be called before any calls to MatSetValues(); 2209 2210 @*/ 2211 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2212 { 2213 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2214 2215 PetscFunctionBegin; 2216 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2217 PetscValidPointer(indices,2); 2218 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2219 if (f) { 2220 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2221 } else { 2222 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2223 } 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2229 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2230 { 2231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2232 PetscErrorCode ierr; 2233 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2234 PetscReal atmp; 2235 PetscScalar *x,zero = 0.0; 2236 MatScalar *aa; 2237 PetscInt ncols,brow,krow,kcol; 2238 2239 PetscFunctionBegin; 2240 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2241 bs = A->rmap.bs; 2242 aa = a->a; 2243 ai = a->i; 2244 aj = a->j; 2245 mbs = a->mbs; 2246 2247 ierr = VecSet(v,zero);CHKERRQ(ierr); 2248 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2249 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2250 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2251 for (i=0; i<mbs; i++) { 2252 ncols = ai[1] - ai[0]; ai++; 2253 brow = bs*i; 2254 for (j=0; j<ncols; j++){ 2255 for (kcol=0; kcol<bs; kcol++){ 2256 for (krow=0; krow<bs; krow++){ 2257 atmp = PetscAbsScalar(*aa);aa++; 2258 row = brow + krow; /* row index */ 2259 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2260 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2261 } 2262 } 2263 aj++; 2264 } 2265 } 2266 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatCopy_SeqBAIJ" 2272 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2273 { 2274 PetscErrorCode ierr; 2275 2276 PetscFunctionBegin; 2277 /* If the two matrices have the same copy implementation, use fast copy. */ 2278 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2280 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2281 2282 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 2283 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2284 } 2285 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 2286 } else { 2287 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2288 } 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2294 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2295 { 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2305 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2306 { 2307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2308 PetscFunctionBegin; 2309 *array = a->a; 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2315 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2316 { 2317 PetscFunctionBegin; 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #include "petscblaslapack.h" 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2324 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2325 { 2326 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2327 PetscErrorCode ierr; 2328 PetscInt i,bs=Y->rmap.bs,j,bs2; 2329 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2330 2331 PetscFunctionBegin; 2332 if (str == SAME_NONZERO_PATTERN) { 2333 PetscScalar alpha = a; 2334 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2335 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2336 if (y->xtoy && y->XtoY != X) { 2337 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2338 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2339 } 2340 if (!y->xtoy) { /* get xtoy */ 2341 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2342 y->XtoY = X; 2343 } 2344 bs2 = bs*bs; 2345 for (i=0; i<x->nz; i++) { 2346 j = 0; 2347 while (j < bs2){ 2348 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2349 j++; 2350 } 2351 } 2352 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2353 } else { 2354 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2355 } 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2361 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2362 { 2363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2364 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2365 MatScalar *aa = a->a; 2366 2367 PetscFunctionBegin; 2368 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2369 PetscFunctionReturn(0); 2370 } 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2374 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2375 { 2376 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2377 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2378 MatScalar *aa = a->a; 2379 2380 PetscFunctionBegin; 2381 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2382 PetscFunctionReturn(0); 2383 } 2384 2385 2386 /* -------------------------------------------------------------------*/ 2387 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2388 MatGetRow_SeqBAIJ, 2389 MatRestoreRow_SeqBAIJ, 2390 MatMult_SeqBAIJ_N, 2391 /* 4*/ MatMultAdd_SeqBAIJ_N, 2392 MatMultTranspose_SeqBAIJ, 2393 MatMultTransposeAdd_SeqBAIJ, 2394 MatSolve_SeqBAIJ_N, 2395 0, 2396 0, 2397 /*10*/ 0, 2398 MatLUFactor_SeqBAIJ, 2399 0, 2400 0, 2401 MatTranspose_SeqBAIJ, 2402 /*15*/ MatGetInfo_SeqBAIJ, 2403 MatEqual_SeqBAIJ, 2404 MatGetDiagonal_SeqBAIJ, 2405 MatDiagonalScale_SeqBAIJ, 2406 MatNorm_SeqBAIJ, 2407 /*20*/ 0, 2408 MatAssemblyEnd_SeqBAIJ, 2409 0, 2410 MatSetOption_SeqBAIJ, 2411 MatZeroEntries_SeqBAIJ, 2412 /*25*/ MatZeroRows_SeqBAIJ, 2413 MatLUFactorSymbolic_SeqBAIJ, 2414 MatLUFactorNumeric_SeqBAIJ_N, 2415 MatCholeskyFactorSymbolic_SeqBAIJ, 2416 MatCholeskyFactorNumeric_SeqBAIJ_N, 2417 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2418 MatILUFactorSymbolic_SeqBAIJ, 2419 MatICCFactorSymbolic_SeqBAIJ, 2420 MatGetArray_SeqBAIJ, 2421 MatRestoreArray_SeqBAIJ, 2422 /*35*/ MatDuplicate_SeqBAIJ, 2423 0, 2424 0, 2425 MatILUFactor_SeqBAIJ, 2426 0, 2427 /*40*/ MatAXPY_SeqBAIJ, 2428 MatGetSubMatrices_SeqBAIJ, 2429 MatIncreaseOverlap_SeqBAIJ, 2430 MatGetValues_SeqBAIJ, 2431 MatCopy_SeqBAIJ, 2432 /*45*/ 0, 2433 MatScale_SeqBAIJ, 2434 0, 2435 0, 2436 0, 2437 /*50*/ 0, 2438 MatGetRowIJ_SeqBAIJ, 2439 MatRestoreRowIJ_SeqBAIJ, 2440 0, 2441 0, 2442 /*55*/ 0, 2443 0, 2444 0, 2445 0, 2446 MatSetValuesBlocked_SeqBAIJ, 2447 /*60*/ MatGetSubMatrix_SeqBAIJ, 2448 MatDestroy_SeqBAIJ, 2449 MatView_SeqBAIJ, 2450 0, 2451 0, 2452 /*65*/ 0, 2453 0, 2454 0, 2455 0, 2456 0, 2457 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2458 MatConvert_Basic, 2459 0, 2460 0, 2461 0, 2462 /*75*/ 0, 2463 0, 2464 0, 2465 0, 2466 0, 2467 /*80*/ 0, 2468 0, 2469 0, 2470 0, 2471 MatLoad_SeqBAIJ, 2472 /*85*/ 0, 2473 0, 2474 0, 2475 0, 2476 0, 2477 /*90*/ 0, 2478 0, 2479 0, 2480 0, 2481 0, 2482 /*95*/ 0, 2483 0, 2484 0, 2485 0, 2486 0, 2487 /*100*/0, 2488 0, 2489 0, 2490 0, 2491 0, 2492 /*105*/0, 2493 MatRealPart_SeqBAIJ, 2494 MatImaginaryPart_SeqBAIJ, 2495 0, 2496 0, 2497 /*110*/0, 2498 0, 2499 0, 2500 0, 2501 MatMissingDiagonal_SeqBAIJ 2502 /*115*/ 2503 }; 2504 2505 EXTERN_C_BEGIN 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2508 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2509 { 2510 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2511 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2512 PetscErrorCode ierr; 2513 2514 PetscFunctionBegin; 2515 if (aij->nonew != 1) { 2516 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2517 } 2518 2519 /* allocate space for values if not already there */ 2520 if (!aij->saved_values) { 2521 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2522 } 2523 2524 /* copy values over */ 2525 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2526 PetscFunctionReturn(0); 2527 } 2528 EXTERN_C_END 2529 2530 EXTERN_C_BEGIN 2531 #undef __FUNCT__ 2532 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2533 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2534 { 2535 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2536 PetscErrorCode ierr; 2537 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2538 2539 PetscFunctionBegin; 2540 if (aij->nonew != 1) { 2541 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2542 } 2543 if (!aij->saved_values) { 2544 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2545 } 2546 2547 /* copy values over */ 2548 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2549 PetscFunctionReturn(0); 2550 } 2551 EXTERN_C_END 2552 2553 EXTERN_C_BEGIN 2554 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2555 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2556 EXTERN_C_END 2557 2558 EXTERN_C_BEGIN 2559 #undef __FUNCT__ 2560 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2561 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2562 { 2563 Mat_SeqBAIJ *b; 2564 PetscErrorCode ierr; 2565 PetscInt i,mbs,nbs,bs2,newbs = bs; 2566 PetscTruth flg,skipallocation = PETSC_FALSE; 2567 2568 PetscFunctionBegin; 2569 2570 if (nz == MAT_SKIP_ALLOCATION) { 2571 skipallocation = PETSC_TRUE; 2572 nz = 0; 2573 } 2574 2575 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2576 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2577 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2578 2579 if (nnz && newbs != bs) { 2580 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2581 } 2582 bs = newbs; 2583 2584 B->rmap.bs = B->cmap.bs = bs; 2585 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2586 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2587 2588 B->preallocated = PETSC_TRUE; 2589 2590 mbs = B->rmap.n/bs; 2591 nbs = B->cmap.n/bs; 2592 bs2 = bs*bs; 2593 2594 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2595 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2596 } 2597 2598 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2599 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2600 if (nnz) { 2601 for (i=0; i<mbs; i++) { 2602 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2603 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2604 } 2605 } 2606 2607 b = (Mat_SeqBAIJ*)B->data; 2608 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2609 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2610 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2611 2612 B->ops->solve = MatSolve_SeqBAIJ_Update; 2613 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2614 if (!flg) { 2615 switch (bs) { 2616 case 1: 2617 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2618 B->ops->mult = MatMult_SeqBAIJ_1; 2619 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2620 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2621 break; 2622 case 2: 2623 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2624 B->ops->mult = MatMult_SeqBAIJ_2; 2625 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2626 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2627 break; 2628 case 3: 2629 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2630 B->ops->mult = MatMult_SeqBAIJ_3; 2631 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2632 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2633 break; 2634 case 4: 2635 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2636 B->ops->mult = MatMult_SeqBAIJ_4; 2637 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2638 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2639 break; 2640 case 5: 2641 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2642 B->ops->mult = MatMult_SeqBAIJ_5; 2643 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2644 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2645 break; 2646 case 6: 2647 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2648 B->ops->mult = MatMult_SeqBAIJ_6; 2649 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2650 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2651 break; 2652 case 7: 2653 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2654 B->ops->mult = MatMult_SeqBAIJ_7; 2655 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2656 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2657 break; 2658 default: 2659 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2660 B->ops->mult = MatMult_SeqBAIJ_N; 2661 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2662 break; 2663 } 2664 } 2665 B->rmap.bs = bs; 2666 b->mbs = mbs; 2667 b->nbs = nbs; 2668 if (!skipallocation) { 2669 if (!b->imax) { 2670 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2671 } 2672 /* b->ilen will count nonzeros in each block row so far. */ 2673 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2674 if (!nnz) { 2675 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2676 else if (nz <= 0) nz = 1; 2677 for (i=0; i<mbs; i++) b->imax[i] = nz; 2678 nz = nz*mbs; 2679 } else { 2680 nz = 0; 2681 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2682 } 2683 2684 /* allocate the matrix space */ 2685 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2686 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2687 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2688 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2689 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2690 b->singlemalloc = PETSC_TRUE; 2691 b->i[0] = 0; 2692 for (i=1; i<mbs+1; i++) { 2693 b->i[i] = b->i[i-1] + b->imax[i-1]; 2694 } 2695 b->free_a = PETSC_TRUE; 2696 b->free_ij = PETSC_TRUE; 2697 } else { 2698 b->free_a = PETSC_FALSE; 2699 b->free_ij = PETSC_FALSE; 2700 } 2701 2702 B->rmap.bs = bs; 2703 b->bs2 = bs2; 2704 b->mbs = mbs; 2705 b->nz = 0; 2706 b->maxnz = nz*bs2; 2707 B->info.nz_unneeded = (PetscReal)b->maxnz; 2708 PetscFunctionReturn(0); 2709 } 2710 EXTERN_C_END 2711 2712 EXTERN_C_BEGIN 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2715 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar V[]) 2716 { 2717 PetscInt i,m,nz,nz_max=0,*nnz; 2718 PetscScalar *values=0; 2719 PetscErrorCode ierr; 2720 2721 PetscFunctionBegin; 2722 2723 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2724 B->rmap.bs = bs; 2725 B->cmap.bs = bs; 2726 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2727 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2728 m = B->rmap.n/bs; 2729 2730 if (I[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "I[0] must be 0 but it is %D",I[0]); } 2731 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 2732 for(i=0; i<m; i++) { 2733 nz = I[i+1]- I[i]; 2734 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 2735 nz_max = PetscMax(nz_max, nz); 2736 nnz[i] = nz; 2737 } 2738 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2739 ierr = PetscFree(nnz);CHKERRQ(ierr); 2740 2741 values = (PetscScalar*)V; 2742 if (!values) { 2743 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2744 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2745 } 2746 for (i=0; i<m; i++) { 2747 PetscInt ncols = I[i+1] - I[i]; 2748 const PetscInt *icols = J + I[i]; 2749 const PetscScalar *svals = values + (V ? (bs*bs*I[i]) : 0); 2750 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2751 } 2752 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2753 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 2756 PetscFunctionReturn(0); 2757 } 2758 EXTERN_C_END 2759 2760 2761 EXTERN_C_BEGIN 2762 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2763 EXTERN_C_END 2764 2765 /*MC 2766 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2767 block sparse compressed row format. 2768 2769 Options Database Keys: 2770 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2771 2772 Level: beginner 2773 2774 .seealso: MatCreateSeqBAIJ() 2775 M*/ 2776 2777 2778 EXTERN_C_BEGIN 2779 #undef __FUNCT__ 2780 #define __FUNCT__ "MatCreate_SeqBAIJ" 2781 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2782 { 2783 PetscErrorCode ierr; 2784 PetscMPIInt size; 2785 Mat_SeqBAIJ *b; 2786 2787 PetscFunctionBegin; 2788 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2789 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2790 2791 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2792 B->data = (void*)b; 2793 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2794 B->mapping = 0; 2795 b->row = 0; 2796 b->col = 0; 2797 b->icol = 0; 2798 b->reallocs = 0; 2799 b->saved_values = 0; 2800 2801 b->roworiented = PETSC_TRUE; 2802 b->nonew = 0; 2803 b->diag = 0; 2804 b->solve_work = 0; 2805 b->mult_work = 0; 2806 B->spptr = 0; 2807 B->info.nz_unneeded = (PetscReal)b->maxnz; 2808 b->keepzeroedrows = PETSC_FALSE; 2809 b->xtoy = 0; 2810 b->XtoY = 0; 2811 b->compressedrow.use = PETSC_FALSE; 2812 b->compressedrow.nrows = 0; 2813 b->compressedrow.i = PETSC_NULL; 2814 b->compressedrow.rindex = PETSC_NULL; 2815 b->compressedrow.checked = PETSC_FALSE; 2816 B->same_nonzero = PETSC_FALSE; 2817 2818 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 2819 "MatGetFactor_seqbaij_petsc", 2820 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 2821 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2822 "MatInvertBlockDiagonal_SeqBAIJ", 2823 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2824 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2825 "MatStoreValues_SeqBAIJ", 2826 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2827 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2828 "MatRetrieveValues_SeqBAIJ", 2829 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2830 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2831 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2832 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2833 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2834 "MatConvert_SeqBAIJ_SeqAIJ", 2835 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2837 "MatConvert_SeqBAIJ_SeqSBAIJ", 2838 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2840 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2841 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 2843 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 2844 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2845 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2846 PetscFunctionReturn(0); 2847 } 2848 EXTERN_C_END 2849 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 2852 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2853 { 2854 Mat C = *B; 2855 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2856 PetscErrorCode ierr; 2857 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2858 2859 PetscFunctionBegin; 2860 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2861 2862 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2863 for (i=0; i<mbs; i++) { 2864 c->imax[i] = a->imax[i]; 2865 c->ilen[i] = a->ilen[i]; 2866 } 2867 2868 /* allocate the matrix space */ 2869 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2870 c->singlemalloc = PETSC_TRUE; 2871 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2872 if (mbs > 0) { 2873 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2874 if (cpvalues == MAT_COPY_VALUES) { 2875 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2876 } else { 2877 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2878 } 2879 } 2880 c->roworiented = a->roworiented; 2881 c->nonew = a->nonew; 2882 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 2883 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 2884 c->bs2 = a->bs2; 2885 c->mbs = a->mbs; 2886 c->nbs = a->nbs; 2887 2888 if (a->diag) { 2889 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2890 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2891 for (i=0; i<mbs; i++) { 2892 c->diag[i] = a->diag[i]; 2893 } 2894 } else c->diag = 0; 2895 c->nz = a->nz; 2896 c->maxnz = a->maxnz; 2897 c->solve_work = 0; 2898 c->mult_work = 0; 2899 c->free_a = PETSC_TRUE; 2900 c->free_ij = PETSC_TRUE; 2901 C->preallocated = PETSC_TRUE; 2902 C->assembled = PETSC_TRUE; 2903 2904 c->compressedrow.use = a->compressedrow.use; 2905 c->compressedrow.nrows = a->compressedrow.nrows; 2906 c->compressedrow.checked = a->compressedrow.checked; 2907 if ( a->compressedrow.checked && a->compressedrow.use){ 2908 i = a->compressedrow.nrows; 2909 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2910 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2911 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2912 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2913 } else { 2914 c->compressedrow.use = PETSC_FALSE; 2915 c->compressedrow.i = PETSC_NULL; 2916 c->compressedrow.rindex = PETSC_NULL; 2917 } 2918 C->same_nonzero = A->same_nonzero; 2919 *B = C; 2920 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2921 PetscFunctionReturn(0); 2922 } 2923 2924 #undef __FUNCT__ 2925 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2926 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2927 { 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 2932 ierr = MatSetSizes(*B,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2933 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 2934 ierr = MatDuplicateNoCreate_SeqBAIJ(A,cpvalues,B); 2935 PetscFunctionReturn(0); 2936 } 2937 2938 #undef __FUNCT__ 2939 #define __FUNCT__ "MatLoad_SeqBAIJ" 2940 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2941 { 2942 Mat_SeqBAIJ *a; 2943 Mat B; 2944 PetscErrorCode ierr; 2945 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2946 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2947 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2948 PetscInt *masked,nmask,tmp,bs2,ishift; 2949 PetscMPIInt size; 2950 int fd; 2951 PetscScalar *aa; 2952 MPI_Comm comm = ((PetscObject)viewer)->comm; 2953 2954 PetscFunctionBegin; 2955 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2956 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2957 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2958 bs2 = bs*bs; 2959 2960 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2961 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2962 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2963 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2964 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2965 M = header[1]; N = header[2]; nz = header[3]; 2966 2967 if (header[3] < 0) { 2968 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2969 } 2970 2971 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2972 2973 /* 2974 This code adds extra rows to make sure the number of rows is 2975 divisible by the blocksize 2976 */ 2977 mbs = M/bs; 2978 extra_rows = bs - M + bs*(mbs); 2979 if (extra_rows == bs) extra_rows = 0; 2980 else mbs++; 2981 if (extra_rows) { 2982 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2983 } 2984 2985 /* read in row lengths */ 2986 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2987 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2988 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2989 2990 /* read in column indices */ 2991 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2992 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2993 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2994 2995 /* loop over row lengths determining block row lengths */ 2996 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2997 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2998 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2999 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3000 masked = mask + mbs; 3001 rowcount = 0; nzcount = 0; 3002 for (i=0; i<mbs; i++) { 3003 nmask = 0; 3004 for (j=0; j<bs; j++) { 3005 kmax = rowlengths[rowcount]; 3006 for (k=0; k<kmax; k++) { 3007 tmp = jj[nzcount++]/bs; 3008 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3009 } 3010 rowcount++; 3011 } 3012 browlengths[i] += nmask; 3013 /* zero out the mask elements we set */ 3014 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3015 } 3016 3017 /* create our matrix */ 3018 ierr = MatCreate(comm,&B); 3019 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3020 ierr = MatSetType(B,type);CHKERRQ(ierr); 3021 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3022 a = (Mat_SeqBAIJ*)B->data; 3023 3024 /* set matrix "i" values */ 3025 a->i[0] = 0; 3026 for (i=1; i<= mbs; i++) { 3027 a->i[i] = a->i[i-1] + browlengths[i-1]; 3028 a->ilen[i-1] = browlengths[i-1]; 3029 } 3030 a->nz = 0; 3031 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3032 3033 /* read in nonzero values */ 3034 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3035 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3036 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3037 3038 /* set "a" and "j" values into matrix */ 3039 nzcount = 0; jcount = 0; 3040 for (i=0; i<mbs; i++) { 3041 nzcountb = nzcount; 3042 nmask = 0; 3043 for (j=0; j<bs; j++) { 3044 kmax = rowlengths[i*bs+j]; 3045 for (k=0; k<kmax; k++) { 3046 tmp = jj[nzcount++]/bs; 3047 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3048 } 3049 } 3050 /* sort the masked values */ 3051 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3052 3053 /* set "j" values into matrix */ 3054 maskcount = 1; 3055 for (j=0; j<nmask; j++) { 3056 a->j[jcount++] = masked[j]; 3057 mask[masked[j]] = maskcount++; 3058 } 3059 /* set "a" values into matrix */ 3060 ishift = bs2*a->i[i]; 3061 for (j=0; j<bs; j++) { 3062 kmax = rowlengths[i*bs+j]; 3063 for (k=0; k<kmax; k++) { 3064 tmp = jj[nzcountb]/bs ; 3065 block = mask[tmp] - 1; 3066 point = jj[nzcountb] - bs*tmp; 3067 idx = ishift + bs2*block + j + bs*point; 3068 a->a[idx] = (MatScalar)aa[nzcountb++]; 3069 } 3070 } 3071 /* zero out the mask elements we set */ 3072 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3073 } 3074 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3075 3076 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3077 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3078 ierr = PetscFree(aa);CHKERRQ(ierr); 3079 ierr = PetscFree(jj);CHKERRQ(ierr); 3080 ierr = PetscFree(mask);CHKERRQ(ierr); 3081 3082 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3083 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3084 ierr = MatView_Private(B);CHKERRQ(ierr); 3085 3086 *A = B; 3087 PetscFunctionReturn(0); 3088 } 3089 3090 #undef __FUNCT__ 3091 #define __FUNCT__ "MatCreateSeqBAIJ" 3092 /*@C 3093 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3094 compressed row) format. For good matrix assembly performance the 3095 user should preallocate the matrix storage by setting the parameter nz 3096 (or the array nnz). By setting these parameters accurately, performance 3097 during matrix assembly can be increased by more than a factor of 50. 3098 3099 Collective on MPI_Comm 3100 3101 Input Parameters: 3102 + comm - MPI communicator, set to PETSC_COMM_SELF 3103 . bs - size of block 3104 . m - number of rows 3105 . n - number of columns 3106 . nz - number of nonzero blocks per block row (same for all rows) 3107 - nnz - array containing the number of nonzero blocks in the various block rows 3108 (possibly different for each block row) or PETSC_NULL 3109 3110 Output Parameter: 3111 . A - the matrix 3112 3113 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3114 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3115 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3116 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3117 3118 Options Database Keys: 3119 . -mat_no_unroll - uses code that does not unroll the loops in the 3120 block calculations (much slower) 3121 . -mat_block_size - size of the blocks to use 3122 3123 Level: intermediate 3124 3125 Notes: 3126 The number of rows and columns must be divisible by blocksize. 3127 3128 If the nnz parameter is given then the nz parameter is ignored 3129 3130 A nonzero block is any block that as 1 or more nonzeros in it 3131 3132 The block AIJ format is fully compatible with standard Fortran 77 3133 storage. That is, the stored row and column indices can begin at 3134 either one (as in Fortran) or zero. See the users' manual for details. 3135 3136 Specify the preallocated storage with either nz or nnz (not both). 3137 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3138 allocation. For additional details, see the users manual chapter on 3139 matrices. 3140 3141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3142 @*/ 3143 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3144 { 3145 PetscErrorCode ierr; 3146 3147 PetscFunctionBegin; 3148 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3149 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3150 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3151 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3157 /*@C 3158 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3159 per row in the matrix. For good matrix assembly performance the 3160 user should preallocate the matrix storage by setting the parameter nz 3161 (or the array nnz). By setting these parameters accurately, performance 3162 during matrix assembly can be increased by more than a factor of 50. 3163 3164 Collective on MPI_Comm 3165 3166 Input Parameters: 3167 + A - the matrix 3168 . bs - size of block 3169 . nz - number of block nonzeros per block row (same for all rows) 3170 - nnz - array containing the number of block nonzeros in the various block rows 3171 (possibly different for each block row) or PETSC_NULL 3172 3173 Options Database Keys: 3174 . -mat_no_unroll - uses code that does not unroll the loops in the 3175 block calculations (much slower) 3176 . -mat_block_size - size of the blocks to use 3177 3178 Level: intermediate 3179 3180 Notes: 3181 If the nnz parameter is given then the nz parameter is ignored 3182 3183 You can call MatGetInfo() to get information on how effective the preallocation was; 3184 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3185 You can also run with the option -info and look for messages with the string 3186 malloc in them to see if additional memory allocation was needed. 3187 3188 The block AIJ format is fully compatible with standard Fortran 77 3189 storage. That is, the stored row and column indices can begin at 3190 either one (as in Fortran) or zero. See the users' manual for details. 3191 3192 Specify the preallocated storage with either nz or nnz (not both). 3193 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3194 allocation. For additional details, see the users manual chapter on 3195 matrices. 3196 3197 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3198 @*/ 3199 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3200 { 3201 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3202 3203 PetscFunctionBegin; 3204 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3205 if (f) { 3206 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3207 } 3208 PetscFunctionReturn(0); 3209 } 3210 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3213 /*@C 3214 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3215 (the default sequential PETSc format). 3216 3217 Collective on MPI_Comm 3218 3219 Input Parameters: 3220 + A - the matrix 3221 . i - the indices into j for the start of each local row (starts with zero) 3222 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3223 - v - optional values in the matrix 3224 3225 Level: developer 3226 3227 .keywords: matrix, aij, compressed row, sparse 3228 3229 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3230 @*/ 3231 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3232 { 3233 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3234 3235 PetscFunctionBegin; 3236 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3237 if (f) { 3238 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3239 } 3240 PetscFunctionReturn(0); 3241 } 3242 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3246 /*@ 3247 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3248 (upper triangular entries in CSR format) provided by the user. 3249 3250 Collective on MPI_Comm 3251 3252 Input Parameters: 3253 + comm - must be an MPI communicator of size 1 3254 . bs - size of block 3255 . m - number of rows 3256 . n - number of columns 3257 . i - row indices 3258 . j - column indices 3259 - a - matrix values 3260 3261 Output Parameter: 3262 . mat - the matrix 3263 3264 Level: intermediate 3265 3266 Notes: 3267 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3268 once the matrix is destroyed 3269 3270 You cannot set new nonzero locations into this matrix, that will generate an error. 3271 3272 The i and j indices are 0 based 3273 3274 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3275 3276 @*/ 3277 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3278 { 3279 PetscErrorCode ierr; 3280 PetscInt ii; 3281 Mat_SeqBAIJ *baij; 3282 3283 PetscFunctionBegin; 3284 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3285 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3286 3287 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3288 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3289 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3290 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3291 baij = (Mat_SeqBAIJ*)(*mat)->data; 3292 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3293 3294 baij->i = i; 3295 baij->j = j; 3296 baij->a = a; 3297 baij->singlemalloc = PETSC_FALSE; 3298 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3299 baij->free_a = PETSC_FALSE; 3300 baij->free_ij = PETSC_FALSE; 3301 3302 for (ii=0; ii<m; ii++) { 3303 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3304 #if defined(PETSC_USE_DEBUG) 3305 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3306 #endif 3307 } 3308 #if defined(PETSC_USE_DEBUG) 3309 for (ii=0; ii<baij->i[m]; ii++) { 3310 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3311 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3312 } 3313 #endif 3314 3315 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3316 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319