1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 MatScalar *v = a->a,*odiag,*diag,*mdiag; 52 PetscReal shift = 0.0; 53 54 PetscFunctionBegin; 55 if (a->idiagvalid) PetscFunctionReturn(0); 56 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 57 diag_offset = a->diag; 58 if (!a->idiag) { 59 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 70 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 85 diag += 9; 86 mdiag += 9; 87 } 88 break; 89 case 4: 90 for (i=0; i<mbs; i++) { 91 odiag = v + 16*diag_offset[i]; 92 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 95 diag += 16; 96 mdiag += 16; 97 } 98 break; 99 case 5: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 25*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 105 diag += 25; 106 mdiag += 25; 107 } 108 break; 109 default: 110 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 111 } 112 a->idiagvalid = PETSC_TRUE; 113 PetscFunctionReturn(0); 114 } 115 EXTERN_C_END 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 120 { 121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 122 PetscScalar *x,x1,s1; 123 const PetscScalar *b; 124 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 125 PetscErrorCode ierr; 126 PetscInt m = a->mbs,i,i2,nz,idx; 127 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 128 129 PetscFunctionBegin; 130 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 131 its = its*lits; 132 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 133 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 134 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 135 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 136 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 137 138 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 139 140 diag = a->diag; 141 idiag = a->idiag; 142 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 143 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 144 145 if (flag & SOR_ZERO_INITIAL_GUESS) { 146 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 147 x[0] = b[0]*idiag[0]; 148 i2 = 1; 149 idiag += 1; 150 for (i=1; i<m; i++) { 151 v = aa + ai[i]; 152 vi = aj + ai[i]; 153 nz = diag[i] - ai[i]; 154 s1 = b[i2]; 155 while (nz--) { 156 idx = (*vi++); 157 x1 = x[idx]; 158 s1 -= v[0]*x1; 159 v += 1; 160 } 161 x[i2] = idiag[0]*s1; 162 idiag += 1; 163 i2 += 1; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; 174 x[i2] = mdiag[0]*x1; 175 mdiag += 1; 176 i2 += 1; 177 } 178 ierr = PetscLogFlops(m);CHKERRQ(ierr); 179 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 180 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 181 } 182 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 183 idiag = a->idiag+a->mbs - 1; 184 i2 = m - 1; 185 x1 = x[i2]; 186 x[i2] = idiag[0]*x1; 187 idiag -= 1; 188 i2 -= 1; 189 for (i=m-2; i>=0; i--) { 190 v = aa + (diag[i]+1); 191 vi = aj + diag[i] + 1; 192 nz = ai[i+1] - diag[i] - 1; 193 s1 = x[i2]; 194 while (nz--) { 195 idx = (*vi++); 196 x1 = x[idx]; 197 s1 -= v[0]*x1; 198 v += 1; 199 } 200 x[i2] = idiag[0]*s1; 201 idiag -= 2; 202 i2 -= 1; 203 } 204 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 205 } 206 } else { 207 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 208 } 209 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 210 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 PetscScalar *x,x1,x2,s1,s2; 220 const PetscScalar *b; 221 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 222 PetscErrorCode ierr; 223 PetscInt m = a->mbs,i,i2,nz,idx; 224 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 225 226 PetscFunctionBegin; 227 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 228 its = its*lits; 229 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 230 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 231 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 232 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 233 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 234 235 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 236 237 diag = a->diag; 238 idiag = a->idiag; 239 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 240 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 241 242 if (flag & SOR_ZERO_INITIAL_GUESS) { 243 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 244 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 245 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 246 i2 = 2; 247 idiag += 4; 248 for (i=1; i<m; i++) { 249 v = aa + 4*ai[i]; 250 vi = aj + ai[i]; 251 nz = diag[i] - ai[i]; 252 s1 = b[i2]; s2 = b[i2+1]; 253 while (nz--) { 254 idx = 2*(*vi++); 255 x1 = x[idx]; x2 = x[1+idx]; 256 s1 -= v[0]*x1 + v[2]*x2; 257 s2 -= v[1]*x1 + v[3]*x2; 258 v += 4; 259 } 260 x[i2] = idiag[0]*s1 + idiag[2]*s2; 261 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 262 idiag += 4; 263 i2 += 2; 264 } 265 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 266 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 267 } 268 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 269 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 270 i2 = 0; 271 mdiag = a->idiag+4*a->mbs; 272 for (i=0; i<m; i++) { 273 x1 = x[i2]; x2 = x[i2+1]; 274 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 275 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 276 mdiag += 4; 277 i2 += 2; 278 } 279 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 280 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 281 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 282 } 283 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 284 idiag = a->idiag+4*a->mbs - 4; 285 i2 = 2*m - 2; 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = idiag[0]*x1 + idiag[2]*x2; 288 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 289 idiag -= 4; 290 i2 -= 2; 291 for (i=m-2; i>=0; i--) { 292 v = aa + 4*(diag[i]+1); 293 vi = aj + diag[i] + 1; 294 nz = ai[i+1] - diag[i] - 1; 295 s1 = x[i2]; s2 = x[i2+1]; 296 while (nz--) { 297 idx = 2*(*vi++); 298 x1 = x[idx]; x2 = x[1+idx]; 299 s1 -= v[0]*x1 + v[2]*x2; 300 s2 -= v[1]*x1 + v[3]*x2; 301 v += 4; 302 } 303 x[i2] = idiag[0]*s1 + idiag[2]*s2; 304 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 305 idiag -= 4; 306 i2 -= 2; 307 } 308 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 312 } 313 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 314 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 321 { 322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 323 PetscScalar *x,x1,x2,x3,s1,s2,s3; 324 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 325 const PetscScalar *b; 326 PetscErrorCode ierr; 327 PetscInt m = a->mbs,i,i2,nz,idx; 328 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 329 330 PetscFunctionBegin; 331 its = its*lits; 332 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 333 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 334 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 335 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 336 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 337 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 338 339 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 340 341 diag = a->diag; 342 idiag = a->idiag; 343 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 344 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 345 346 if (flag & SOR_ZERO_INITIAL_GUESS) { 347 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 348 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 349 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 350 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 351 i2 = 3; 352 idiag += 9; 353 for (i=1; i<m; i++) { 354 v = aa + 9*ai[i]; 355 vi = aj + ai[i]; 356 nz = diag[i] - ai[i]; 357 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 358 while (nz--) { 359 idx = 3*(*vi++); 360 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 361 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 362 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 363 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 364 v += 9; 365 } 366 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 367 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 368 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 369 idiag += 9; 370 i2 += 3; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 374 } 375 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 376 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 377 i2 = 0; 378 mdiag = a->idiag+9*a->mbs; 379 for (i=0; i<m; i++) { 380 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 381 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 382 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 383 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 384 mdiag += 9; 385 i2 += 3; 386 } 387 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 388 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 389 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 390 } 391 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 392 idiag = a->idiag+9*a->mbs - 9; 393 i2 = 3*m - 3; 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 396 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 397 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 398 idiag -= 9; 399 i2 -= 3; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 9*(diag[i]+1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 405 while (nz--) { 406 idx = 3*(*vi++); 407 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 408 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 409 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 410 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 411 v += 9; 412 } 413 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 414 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 415 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 416 idiag -= 9; 417 i2 -= 3; 418 } 419 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 420 } 421 } else { 422 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 423 } 424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 425 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 432 { 433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 434 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 435 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 436 const PetscScalar *b; 437 PetscErrorCode ierr; 438 PetscInt m = a->mbs,i,i2,nz,idx; 439 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 440 441 PetscFunctionBegin; 442 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 443 its = its*lits; 444 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 445 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 446 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 447 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 448 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 449 450 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 451 452 diag = a->diag; 453 idiag = a->idiag; 454 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 455 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 456 457 if (flag & SOR_ZERO_INITIAL_GUESS) { 458 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 459 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 460 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 461 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 462 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 463 i2 = 4; 464 idiag += 16; 465 for (i=1; i<m; i++) { 466 v = aa + 16*ai[i]; 467 vi = aj + ai[i]; 468 nz = diag[i] - ai[i]; 469 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 473 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 474 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 475 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 476 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 477 v += 16; 478 } 479 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 480 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 481 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 482 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 483 idiag += 16; 484 i2 += 4; 485 } 486 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 487 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 488 } 489 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 490 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 491 i2 = 0; 492 mdiag = a->idiag+16*a->mbs; 493 for (i=0; i<m; i++) { 494 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 495 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 496 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 497 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 498 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 499 mdiag += 16; 500 i2 += 4; 501 } 502 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 503 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 504 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 505 } 506 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 507 idiag = a->idiag+16*a->mbs - 16; 508 i2 = 4*m - 4; 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 510 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 511 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 512 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 513 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 514 idiag -= 16; 515 i2 -= 4; 516 for (i=m-2; i>=0; i--) { 517 v = aa + 16*(diag[i]+1); 518 vi = aj + diag[i] + 1; 519 nz = ai[i+1] - diag[i] - 1; 520 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 521 while (nz--) { 522 idx = 4*(*vi++); 523 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 524 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 525 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 526 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 527 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 528 v += 16; 529 } 530 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 531 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 532 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 533 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 534 idiag -= 16; 535 i2 -= 4; 536 } 537 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 538 } 539 } else { 540 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 541 } 542 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 543 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 550 { 551 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 552 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 553 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 554 const PetscScalar *b; 555 PetscErrorCode ierr; 556 PetscInt m = a->mbs,i,i2,nz,idx; 557 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 558 559 PetscFunctionBegin; 560 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 561 its = its*lits; 562 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 563 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 564 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 565 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 566 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 567 568 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 569 570 diag = a->diag; 571 idiag = a->idiag; 572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 573 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 574 575 if (flag & SOR_ZERO_INITIAL_GUESS) { 576 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 577 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 578 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 579 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 580 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 581 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 582 i2 = 5; 583 idiag += 25; 584 for (i=1; i<m; i++) { 585 v = aa + 25*ai[i]; 586 vi = aj + ai[i]; 587 nz = diag[i] - ai[i]; 588 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 589 while (nz--) { 590 idx = 5*(*vi++); 591 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 592 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 593 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 594 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 595 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 596 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 597 v += 25; 598 } 599 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 600 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 601 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 602 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 603 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 604 idiag += 25; 605 i2 += 5; 606 } 607 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 608 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 609 } 610 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 611 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 612 i2 = 0; 613 mdiag = a->idiag+25*a->mbs; 614 for (i=0; i<m; i++) { 615 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 616 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 617 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 618 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 619 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 620 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 621 mdiag += 25; 622 i2 += 5; 623 } 624 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 625 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 626 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 627 } 628 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 629 idiag = a->idiag+25*a->mbs - 25; 630 i2 = 5*m - 5; 631 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 632 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 633 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 634 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 635 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 636 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 637 idiag -= 25; 638 i2 -= 5; 639 for (i=m-2; i>=0; i--) { 640 v = aa + 25*(diag[i]+1); 641 vi = aj + diag[i] + 1; 642 nz = ai[i+1] - diag[i] - 1; 643 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 644 while (nz--) { 645 idx = 5*(*vi++); 646 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 647 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 648 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 649 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 650 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 651 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 652 v += 25; 653 } 654 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 655 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 656 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 657 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 658 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 659 idiag -= 25; 660 i2 -= 5; 661 } 662 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 663 } 664 } else { 665 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 666 } 667 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 668 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNCT__ 673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 675 { 676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 677 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 678 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 679 const PetscScalar *b; 680 PetscErrorCode ierr; 681 PetscInt m = a->mbs,i,i2,nz,idx; 682 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 683 684 PetscFunctionBegin; 685 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 686 its = its*lits; 687 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 688 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 689 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 690 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 691 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 692 693 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 694 695 diag = a->diag; 696 idiag = a->idiag; 697 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 698 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 699 700 if (flag & SOR_ZERO_INITIAL_GUESS) { 701 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 702 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 703 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 704 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 705 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 706 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 707 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 708 i2 = 6; 709 idiag += 36; 710 for (i=1; i<m; i++) { 711 v = aa + 36*ai[i]; 712 vi = aj + ai[i]; 713 nz = diag[i] - ai[i]; 714 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 715 while (nz--) { 716 idx = 6*(*vi++); 717 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 718 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 719 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 720 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 721 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 722 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 723 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 724 v += 36; 725 } 726 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 727 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 728 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 729 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 730 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 731 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 732 idiag += 36; 733 i2 += 6; 734 } 735 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 736 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 737 } 738 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 739 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 740 i2 = 0; 741 mdiag = a->idiag+36*a->mbs; 742 for (i=0; i<m; i++) { 743 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 744 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 745 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 746 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 747 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 748 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 749 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 750 mdiag += 36; 751 i2 += 6; 752 } 753 ierr = PetscLogFlops(60*m);CHKERRQ(ierr); 754 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 755 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 756 } 757 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 758 idiag = a->idiag+36*a->mbs - 36; 759 i2 = 6*m - 6; 760 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 761 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 762 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 763 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 764 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 765 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 766 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 767 idiag -= 36; 768 i2 -= 6; 769 for (i=m-2; i>=0; i--) { 770 v = aa + 36*(diag[i]+1); 771 vi = aj + diag[i] + 1; 772 nz = ai[i+1] - diag[i] - 1; 773 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 774 while (nz--) { 775 idx = 6*(*vi++); 776 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 777 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 778 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 779 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 780 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 781 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 782 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 783 v += 36; 784 } 785 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 786 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 787 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 788 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 789 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 790 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 791 idiag -= 36; 792 i2 -= 6; 793 } 794 ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr); 795 } 796 } else { 797 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 798 } 799 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 810 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 811 const PetscScalar *b; 812 PetscErrorCode ierr; 813 PetscInt m = a->mbs,i,i2,nz,idx; 814 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 815 816 PetscFunctionBegin; 817 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 818 its = its*lits; 819 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 820 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 821 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 822 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 823 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 824 825 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 826 827 diag = a->diag; 828 idiag = a->idiag; 829 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 830 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 831 832 if (flag & SOR_ZERO_INITIAL_GUESS) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 834 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 835 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 836 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 837 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 838 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 839 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 840 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 841 i2 = 7; 842 idiag += 49; 843 for (i=1; i<m; i++) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = diag[i] - ai[i]; 847 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 848 while (nz--) { 849 idx = 7*(*vi++); 850 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 851 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 852 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 853 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 854 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 855 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 856 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 857 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 858 v += 49; 859 } 860 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 861 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 862 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 863 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 864 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 865 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 866 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 867 idiag += 49; 868 i2 += 7; 869 } 870 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 871 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 872 } 873 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 874 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 875 i2 = 0; 876 mdiag = a->idiag+49*a->mbs; 877 for (i=0; i<m; i++) { 878 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 879 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 880 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 881 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 882 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 883 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 884 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 885 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 886 mdiag += 36; 887 i2 += 6; 888 } 889 ierr = PetscLogFlops(93*m);CHKERRQ(ierr); 890 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 891 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 894 idiag = a->idiag+49*a->mbs - 49; 895 i2 = 7*m - 7; 896 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 897 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 898 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 899 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 900 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 901 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 902 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 903 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 904 idiag -= 49; 905 i2 -= 7; 906 for (i=m-2; i>=0; i--) { 907 v = aa + 49*(diag[i]+1); 908 vi = aj + diag[i] + 1; 909 nz = ai[i+1] - diag[i] - 1; 910 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 911 while (nz--) { 912 idx = 7*(*vi++); 913 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 914 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 915 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 916 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 917 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 918 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 919 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 920 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 921 v += 49; 922 } 923 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 924 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 925 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 926 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 927 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 928 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 929 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 930 idiag -= 49; 931 i2 -= 7; 932 } 933 ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr); 934 } 935 } else { 936 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 937 } 938 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 939 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 /* 944 Special version for direct calls from Fortran (Used in PETSc-fun3d) 945 */ 946 #if defined(PETSC_HAVE_FORTRAN_CAPS) 947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 949 #define matsetvaluesblocked4_ matsetvaluesblocked4 950 #endif 951 952 EXTERN_C_BEGIN 953 #undef __FUNCT__ 954 #define __FUNCT__ "matsetvaluesblocked4_" 955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 956 { 957 Mat A = *AA; 958 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 959 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 960 PetscInt *ai=a->i,*ailen=a->ilen; 961 PetscInt *aj=a->j,stepval,lastcol = -1; 962 const PetscScalar *value = v; 963 MatScalar *ap,*aa = a->a,*bap; 964 965 PetscFunctionBegin; 966 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 967 stepval = (n-1)*4; 968 for (k=0; k<m; k++) { /* loop over added rows */ 969 row = im[k]; 970 rp = aj + ai[row]; 971 ap = aa + 16*ai[row]; 972 nrow = ailen[row]; 973 low = 0; 974 high = nrow; 975 for (l=0; l<n; l++) { /* loop over added columns */ 976 col = in[l]; 977 if (col <= lastcol) low = 0; else high = nrow; 978 lastcol = col; 979 value = v + k*(stepval+4 + l)*4; 980 while (high-low > 7) { 981 t = (low+high)/2; 982 if (rp[t] > col) high = t; 983 else low = t; 984 } 985 for (i=low; i<high; i++) { 986 if (rp[i] > col) break; 987 if (rp[i] == col) { 988 bap = ap + 16*i; 989 for (ii=0; ii<4; ii++,value+=stepval) { 990 for (jj=ii; jj<16; jj+=4) { 991 bap[jj] += *value++; 992 } 993 } 994 goto noinsert2; 995 } 996 } 997 N = nrow++ - 1; 998 high++; /* added new column index thus must search to one higher than before */ 999 /* shift up all the later entries in this row */ 1000 for (ii=N; ii>=i; ii--) { 1001 rp[ii+1] = rp[ii]; 1002 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1003 } 1004 if (N >= i) { 1005 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1006 } 1007 rp[i] = col; 1008 bap = ap + 16*i; 1009 for (ii=0; ii<4; ii++,value+=stepval) { 1010 for (jj=ii; jj<16; jj+=4) { 1011 bap[jj] = *value++; 1012 } 1013 } 1014 noinsert2:; 1015 low = i; 1016 } 1017 ailen[row] = nrow; 1018 } 1019 PetscFunctionReturnVoid(); 1020 } 1021 EXTERN_C_END 1022 1023 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1024 #define matsetvalues4_ MATSETVALUES4 1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1026 #define matsetvalues4_ matsetvalues4 1027 #endif 1028 1029 EXTERN_C_BEGIN 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSetValues4_" 1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1033 { 1034 Mat A = *AA; 1035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1036 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1037 PetscInt *ai=a->i,*ailen=a->ilen; 1038 PetscInt *aj=a->j,brow,bcol; 1039 PetscInt ridx,cidx,lastcol = -1; 1040 MatScalar *ap,value,*aa=a->a,*bap; 1041 1042 PetscFunctionBegin; 1043 for (k=0; k<m; k++) { /* loop over added rows */ 1044 row = im[k]; brow = row/4; 1045 rp = aj + ai[brow]; 1046 ap = aa + 16*ai[brow]; 1047 nrow = ailen[brow]; 1048 low = 0; 1049 high = nrow; 1050 for (l=0; l<n; l++) { /* loop over added columns */ 1051 col = in[l]; bcol = col/4; 1052 ridx = row % 4; cidx = col % 4; 1053 value = v[l + k*n]; 1054 if (col <= lastcol) low = 0; else high = nrow; 1055 lastcol = col; 1056 while (high-low > 7) { 1057 t = (low+high)/2; 1058 if (rp[t] > bcol) high = t; 1059 else low = t; 1060 } 1061 for (i=low; i<high; i++) { 1062 if (rp[i] > bcol) break; 1063 if (rp[i] == bcol) { 1064 bap = ap + 16*i + 4*cidx + ridx; 1065 *bap += value; 1066 goto noinsert1; 1067 } 1068 } 1069 N = nrow++ - 1; 1070 high++; /* added new column thus must search to one higher than before */ 1071 /* shift up all the later entries in this row */ 1072 for (ii=N; ii>=i; ii--) { 1073 rp[ii+1] = rp[ii]; 1074 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1075 } 1076 if (N>=i) { 1077 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1078 } 1079 rp[i] = bcol; 1080 ap[16*i + 4*cidx + ridx] = value; 1081 noinsert1:; 1082 low = i; 1083 } 1084 ailen[brow] = nrow; 1085 } 1086 PetscFunctionReturnVoid(); 1087 } 1088 EXTERN_C_END 1089 1090 /* 1091 Checks for missing diagonals 1092 */ 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 PetscInt *diag,*jj = a->j,i; 1100 1101 PetscFunctionBegin; 1102 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1103 *missing = PETSC_FALSE; 1104 diag = a->diag; 1105 for (i=0; i<a->mbs; i++) { 1106 if (jj[diag[i]] != i) { 1107 *missing = PETSC_TRUE; 1108 if (d) *d = i; 1109 } 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscInt i,j,m = a->mbs; 1121 1122 PetscFunctionBegin; 1123 if (!a->diag) { 1124 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1125 } 1126 for (i=0; i<m; i++) { 1127 a->diag[i] = a->i[i+1]; 1128 for (j=a->i[i]; j<a->i[i+1]; j++) { 1129 if (a->j[j] == i) { 1130 a->diag[i] = j; 1131 break; 1132 } 1133 } 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 1148 PetscInt *tia, *tja; 1149 1150 PetscFunctionBegin; 1151 *nn = n; 1152 if (!ia) PetscFunctionReturn(0); 1153 if (symmetric) { 1154 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1155 } else { 1156 tia = a->i; tja = a->j; 1157 } 1158 1159 if (!blockcompressed && bs > 1) { 1160 (*nn) *= bs; 1161 nbs = bs; 1162 /* malloc & create the natural set of indices */ 1163 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1164 if (n) { 1165 (*ia)[0] = 0; 1166 for (j=1; j<bs; j++) { 1167 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1168 } 1169 } 1170 1171 for (i=1; i<n; i++) { 1172 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1173 for (j=1; j<bs; j++) { 1174 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1175 } 1176 } 1177 if (n) { 1178 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1179 } 1180 1181 if (ja) { 1182 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<n; i++) { 1185 for (j=0; j<bs; j++) { 1186 for (k=tia[i]; k<tia[i+1]; k++) { 1187 for (l=0; l<bs; l++) { 1188 (*ja)[cnt++] = bs*tja[k] + l; 1189 } 1190 } 1191 } 1192 } 1193 } 1194 1195 n *= bs; 1196 nz *= bs*bs; 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 ierr = PetscFree(tia);CHKERRQ(ierr); 1199 ierr = PetscFree(tja);CHKERRQ(ierr); 1200 } 1201 } else { 1202 *ia = tia; 1203 if (ja) *ja = tja; 1204 } 1205 if (oshift == 1) { 1206 for (i=0; i<n+nbs; i++) (*ia)[i]++; 1207 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 1208 } 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1217 PetscErrorCode ierr; 1218 PetscInt i,n = a->mbs,nz = a->i[n]; 1219 1220 PetscFunctionBegin; 1221 if (!ia) PetscFunctionReturn(0); 1222 if (!blockcompressed && A->rmap.bs > 1) { 1223 ierr = PetscFree(*ia);CHKERRQ(ierr); 1224 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1225 } else if (symmetric) { 1226 ierr = PetscFree(*ia);CHKERRQ(ierr); 1227 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1228 } else if (oshift == 1) { /* blockcompressed */ 1229 for (i=0; i<n+1; i++) a->i[i]--; 1230 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1238 { 1239 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 #if defined(PETSC_USE_LOG) 1244 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 1245 #endif 1246 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1247 if (a->row) { 1248 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1249 } 1250 if (a->col) { 1251 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1254 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1255 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1256 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1257 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1258 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1259 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1260 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1261 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1262 1263 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1264 ierr = PetscFree(a);CHKERRQ(ierr); 1265 1266 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1280 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1281 { 1282 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1283 PetscErrorCode ierr; 1284 1285 PetscFunctionBegin; 1286 switch (op) { 1287 case MAT_ROW_ORIENTED: 1288 a->roworiented = flg; 1289 break; 1290 case MAT_KEEP_ZEROED_ROWS: 1291 a->keepzeroedrows = flg; 1292 break; 1293 case MAT_NEW_NONZERO_LOCATIONS: 1294 a->nonew = (flg ? 0 : 1); 1295 break; 1296 case MAT_NEW_NONZERO_LOCATION_ERR: 1297 a->nonew = (flg ? -1 : 0); 1298 break; 1299 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1300 a->nonew = (flg ? -2 : 0); 1301 break; 1302 case MAT_NEW_DIAGONALS: 1303 case MAT_IGNORE_OFF_PROC_ENTRIES: 1304 case MAT_USE_HASH_TABLE: 1305 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1306 break; 1307 case MAT_SYMMETRIC: 1308 case MAT_STRUCTURALLY_SYMMETRIC: 1309 case MAT_HERMITIAN: 1310 case MAT_SYMMETRY_ETERNAL: 1311 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1312 break; 1313 default: 1314 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1315 } 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1321 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1322 { 1323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1324 PetscErrorCode ierr; 1325 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1326 MatScalar *aa,*aa_i; 1327 PetscScalar *v_i; 1328 1329 PetscFunctionBegin; 1330 bs = A->rmap.bs; 1331 ai = a->i; 1332 aj = a->j; 1333 aa = a->a; 1334 bs2 = a->bs2; 1335 1336 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1337 1338 bn = row/bs; /* Block number */ 1339 bp = row % bs; /* Block Position */ 1340 M = ai[bn+1] - ai[bn]; 1341 *nz = bs*M; 1342 1343 if (v) { 1344 *v = 0; 1345 if (*nz) { 1346 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1347 for (i=0; i<M; i++) { /* for each block in the block row */ 1348 v_i = *v + i*bs; 1349 aa_i = aa + bs2*(ai[bn] + i); 1350 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1351 } 1352 } 1353 } 1354 1355 if (idx) { 1356 *idx = 0; 1357 if (*nz) { 1358 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1359 for (i=0; i<M; i++) { /* for each block in the block row */ 1360 idx_i = *idx + i*bs; 1361 itmp = bs*aj[ai[bn] + i]; 1362 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1363 } 1364 } 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1371 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1372 { 1373 PetscErrorCode ierr; 1374 1375 PetscFunctionBegin; 1376 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1377 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1383 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1384 { 1385 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1386 Mat C; 1387 PetscErrorCode ierr; 1388 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1389 PetscInt *rows,*cols,bs2=a->bs2; 1390 MatScalar *array; 1391 1392 PetscFunctionBegin; 1393 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1394 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1395 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1396 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1397 1398 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1399 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1400 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1401 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1402 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1403 ierr = PetscFree(col);CHKERRQ(ierr); 1404 } else { 1405 C = *B; 1406 } 1407 1408 array = a->a; 1409 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1410 cols = rows + bs; 1411 for (i=0; i<mbs; i++) { 1412 cols[0] = i*bs; 1413 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1414 len = ai[i+1] - ai[i]; 1415 for (j=0; j<len; j++) { 1416 rows[0] = (*aj++)*bs; 1417 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1418 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1419 array += bs2; 1420 } 1421 } 1422 ierr = PetscFree(rows);CHKERRQ(ierr); 1423 1424 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1425 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 1427 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1428 *B = C; 1429 } else { 1430 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1431 } 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1437 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1438 { 1439 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1440 PetscErrorCode ierr; 1441 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1442 int fd; 1443 PetscScalar *aa; 1444 FILE *file; 1445 1446 PetscFunctionBegin; 1447 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1448 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1449 col_lens[0] = MAT_FILE_COOKIE; 1450 1451 col_lens[1] = A->rmap.N; 1452 col_lens[2] = A->cmap.n; 1453 col_lens[3] = a->nz*bs2; 1454 1455 /* store lengths of each row and write (including header) to file */ 1456 count = 0; 1457 for (i=0; i<a->mbs; i++) { 1458 for (j=0; j<bs; j++) { 1459 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1460 } 1461 } 1462 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1463 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1464 1465 /* store column indices (zero start index) */ 1466 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1467 count = 0; 1468 for (i=0; i<a->mbs; i++) { 1469 for (j=0; j<bs; j++) { 1470 for (k=a->i[i]; k<a->i[i+1]; k++) { 1471 for (l=0; l<bs; l++) { 1472 jj[count++] = bs*a->j[k] + l; 1473 } 1474 } 1475 } 1476 } 1477 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1478 ierr = PetscFree(jj);CHKERRQ(ierr); 1479 1480 /* store nonzero values */ 1481 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1482 count = 0; 1483 for (i=0; i<a->mbs; i++) { 1484 for (j=0; j<bs; j++) { 1485 for (k=a->i[i]; k<a->i[i+1]; k++) { 1486 for (l=0; l<bs; l++) { 1487 aa[count++] = a->a[bs2*k + l*bs + j]; 1488 } 1489 } 1490 } 1491 } 1492 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1493 ierr = PetscFree(aa);CHKERRQ(ierr); 1494 1495 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1496 if (file) { 1497 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1504 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1505 { 1506 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1507 PetscErrorCode ierr; 1508 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1509 PetscViewerFormat format; 1510 1511 PetscFunctionBegin; 1512 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1513 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1514 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1515 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1516 Mat aij; 1517 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1518 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1519 ierr = MatDestroy(aij);CHKERRQ(ierr); 1520 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1521 PetscFunctionReturn(0); 1522 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1523 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1524 for (i=0; i<a->mbs; i++) { 1525 for (j=0; j<bs; j++) { 1526 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1527 for (k=a->i[i]; k<a->i[i+1]; k++) { 1528 for (l=0; l<bs; l++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1531 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1532 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1533 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1535 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1537 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } 1539 #else 1540 if (a->a[bs2*k + l*bs + j] != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1542 } 1543 #endif 1544 } 1545 } 1546 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1547 } 1548 } 1549 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1550 } else { 1551 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1552 for (i=0; i<a->mbs; i++) { 1553 for (j=0; j<bs; j++) { 1554 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1555 for (k=a->i[i]; k<a->i[i+1]; k++) { 1556 for (l=0; l<bs; l++) { 1557 #if defined(PETSC_USE_COMPLEX) 1558 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1559 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1560 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1561 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1562 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1563 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1564 } else { 1565 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1566 } 1567 #else 1568 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1569 #endif 1570 } 1571 } 1572 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1573 } 1574 } 1575 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1576 } 1577 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1583 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1584 { 1585 Mat A = (Mat) Aa; 1586 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1587 PetscErrorCode ierr; 1588 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1589 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1590 MatScalar *aa; 1591 PetscViewer viewer; 1592 1593 PetscFunctionBegin; 1594 1595 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1596 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1597 1598 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1599 1600 /* loop over matrix elements drawing boxes */ 1601 color = PETSC_DRAW_BLUE; 1602 for (i=0,row=0; i<mbs; i++,row+=bs) { 1603 for (j=a->i[i]; j<a->i[i+1]; j++) { 1604 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1605 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1606 aa = a->a + j*bs2; 1607 for (k=0; k<bs; k++) { 1608 for (l=0; l<bs; l++) { 1609 if (PetscRealPart(*aa++) >= 0.) continue; 1610 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1611 } 1612 } 1613 } 1614 } 1615 color = PETSC_DRAW_CYAN; 1616 for (i=0,row=0; i<mbs; i++,row+=bs) { 1617 for (j=a->i[i]; j<a->i[i+1]; j++) { 1618 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1619 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1620 aa = a->a + j*bs2; 1621 for (k=0; k<bs; k++) { 1622 for (l=0; l<bs; l++) { 1623 if (PetscRealPart(*aa++) != 0.) continue; 1624 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1625 } 1626 } 1627 } 1628 } 1629 1630 color = PETSC_DRAW_RED; 1631 for (i=0,row=0; i<mbs; i++,row+=bs) { 1632 for (j=a->i[i]; j<a->i[i+1]; j++) { 1633 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1634 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1635 aa = a->a + j*bs2; 1636 for (k=0; k<bs; k++) { 1637 for (l=0; l<bs; l++) { 1638 if (PetscRealPart(*aa++) <= 0.) continue; 1639 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1640 } 1641 } 1642 } 1643 } 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1649 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1650 { 1651 PetscErrorCode ierr; 1652 PetscReal xl,yl,xr,yr,w,h; 1653 PetscDraw draw; 1654 PetscTruth isnull; 1655 1656 PetscFunctionBegin; 1657 1658 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1659 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1660 1661 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1662 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1663 xr += w; yr += h; xl = -w; yl = -h; 1664 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1665 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1666 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatView_SeqBAIJ" 1672 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1673 { 1674 PetscErrorCode ierr; 1675 PetscTruth iascii,isbinary,isdraw; 1676 1677 PetscFunctionBegin; 1678 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1679 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1680 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1681 if (iascii){ 1682 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1683 } else if (isbinary) { 1684 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1685 } else if (isdraw) { 1686 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1687 } else { 1688 Mat B; 1689 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1690 ierr = MatView(B,viewer);CHKERRQ(ierr); 1691 ierr = MatDestroy(B);CHKERRQ(ierr); 1692 } 1693 PetscFunctionReturn(0); 1694 } 1695 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1699 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1700 { 1701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1702 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1703 PetscInt *ai = a->i,*ailen = a->ilen; 1704 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1705 MatScalar *ap,*aa = a->a; 1706 1707 PetscFunctionBegin; 1708 for (k=0; k<m; k++) { /* loop over rows */ 1709 row = im[k]; brow = row/bs; 1710 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1711 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1712 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1713 nrow = ailen[brow]; 1714 for (l=0; l<n; l++) { /* loop over columns */ 1715 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1716 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1717 col = in[l] ; 1718 bcol = col/bs; 1719 cidx = col%bs; 1720 ridx = row%bs; 1721 high = nrow; 1722 low = 0; /* assume unsorted */ 1723 while (high-low > 5) { 1724 t = (low+high)/2; 1725 if (rp[t] > bcol) high = t; 1726 else low = t; 1727 } 1728 for (i=low; i<high; i++) { 1729 if (rp[i] > bcol) break; 1730 if (rp[i] == bcol) { 1731 *v++ = ap[bs2*i+bs*cidx+ridx]; 1732 goto finished; 1733 } 1734 } 1735 *v++ = 0.0; 1736 finished:; 1737 } 1738 } 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #define CHUNKSIZE 10 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1745 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1746 { 1747 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1748 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1749 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1750 PetscErrorCode ierr; 1751 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1752 PetscTruth roworiented=a->roworiented; 1753 const PetscScalar *value = v; 1754 MatScalar *ap,*aa = a->a,*bap; 1755 1756 PetscFunctionBegin; 1757 if (roworiented) { 1758 stepval = (n-1)*bs; 1759 } else { 1760 stepval = (m-1)*bs; 1761 } 1762 for (k=0; k<m; k++) { /* loop over added rows */ 1763 row = im[k]; 1764 if (row < 0) continue; 1765 #if defined(PETSC_USE_DEBUG) 1766 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1767 #endif 1768 rp = aj + ai[row]; 1769 ap = aa + bs2*ai[row]; 1770 rmax = imax[row]; 1771 nrow = ailen[row]; 1772 low = 0; 1773 high = nrow; 1774 for (l=0; l<n; l++) { /* loop over added columns */ 1775 if (in[l] < 0) continue; 1776 #if defined(PETSC_USE_DEBUG) 1777 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1778 #endif 1779 col = in[l]; 1780 if (roworiented) { 1781 value = v + k*(stepval+bs)*bs + l*bs; 1782 } else { 1783 value = v + l*(stepval+bs)*bs + k*bs; 1784 } 1785 if (col <= lastcol) low = 0; else high = nrow; 1786 lastcol = col; 1787 while (high-low > 7) { 1788 t = (low+high)/2; 1789 if (rp[t] > col) high = t; 1790 else low = t; 1791 } 1792 for (i=low; i<high; i++) { 1793 if (rp[i] > col) break; 1794 if (rp[i] == col) { 1795 bap = ap + bs2*i; 1796 if (roworiented) { 1797 if (is == ADD_VALUES) { 1798 for (ii=0; ii<bs; ii++,value+=stepval) { 1799 for (jj=ii; jj<bs2; jj+=bs) { 1800 bap[jj] += *value++; 1801 } 1802 } 1803 } else { 1804 for (ii=0; ii<bs; ii++,value+=stepval) { 1805 for (jj=ii; jj<bs2; jj+=bs) { 1806 bap[jj] = *value++; 1807 } 1808 } 1809 } 1810 } else { 1811 if (is == ADD_VALUES) { 1812 for (ii=0; ii<bs; ii++,value+=stepval) { 1813 for (jj=0; jj<bs; jj++) { 1814 *bap++ += *value++; 1815 } 1816 } 1817 } else { 1818 for (ii=0; ii<bs; ii++,value+=stepval) { 1819 for (jj=0; jj<bs; jj++) { 1820 *bap++ = *value++; 1821 } 1822 } 1823 } 1824 } 1825 goto noinsert2; 1826 } 1827 } 1828 if (nonew == 1) goto noinsert2; 1829 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1830 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1831 N = nrow++ - 1; high++; 1832 /* shift up all the later entries in this row */ 1833 for (ii=N; ii>=i; ii--) { 1834 rp[ii+1] = rp[ii]; 1835 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1836 } 1837 if (N >= i) { 1838 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1839 } 1840 rp[i] = col; 1841 bap = ap + bs2*i; 1842 if (roworiented) { 1843 for (ii=0; ii<bs; ii++,value+=stepval) { 1844 for (jj=ii; jj<bs2; jj+=bs) { 1845 bap[jj] = *value++; 1846 } 1847 } 1848 } else { 1849 for (ii=0; ii<bs; ii++,value+=stepval) { 1850 for (jj=0; jj<bs; jj++) { 1851 *bap++ = *value++; 1852 } 1853 } 1854 } 1855 noinsert2:; 1856 low = i; 1857 } 1858 ailen[row] = nrow; 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1865 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1866 { 1867 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1868 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1869 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1870 PetscErrorCode ierr; 1871 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1872 MatScalar *aa = a->a,*ap; 1873 PetscReal ratio=0.6; 1874 1875 PetscFunctionBegin; 1876 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1877 1878 if (m) rmax = ailen[0]; 1879 for (i=1; i<mbs; i++) { 1880 /* move each row back by the amount of empty slots (fshift) before it*/ 1881 fshift += imax[i-1] - ailen[i-1]; 1882 rmax = PetscMax(rmax,ailen[i]); 1883 if (fshift) { 1884 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1885 N = ailen[i]; 1886 for (j=0; j<N; j++) { 1887 ip[j-fshift] = ip[j]; 1888 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1889 } 1890 } 1891 ai[i] = ai[i-1] + ailen[i-1]; 1892 } 1893 if (mbs) { 1894 fshift += imax[mbs-1] - ailen[mbs-1]; 1895 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1896 } 1897 /* reset ilen and imax for each row */ 1898 for (i=0; i<mbs; i++) { 1899 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1900 } 1901 a->nz = ai[mbs]; 1902 1903 /* diagonals may have moved, so kill the diagonal pointers */ 1904 a->idiagvalid = PETSC_FALSE; 1905 if (fshift && a->diag) { 1906 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1907 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1908 a->diag = 0; 1909 } 1910 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1911 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1912 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1913 a->reallocs = 0; 1914 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1915 1916 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1917 if (a->compressedrow.use){ 1918 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1919 } 1920 1921 A->same_nonzero = PETSC_TRUE; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 /* 1926 This function returns an array of flags which indicate the locations of contiguous 1927 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1928 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1929 Assume: sizes should be long enough to hold all the values. 1930 */ 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1933 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1934 { 1935 PetscInt i,j,k,row; 1936 PetscTruth flg; 1937 1938 PetscFunctionBegin; 1939 for (i=0,j=0; i<n; j++) { 1940 row = idx[i]; 1941 if (row%bs!=0) { /* Not the begining of a block */ 1942 sizes[j] = 1; 1943 i++; 1944 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1945 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1946 i++; 1947 } else { /* Begining of the block, so check if the complete block exists */ 1948 flg = PETSC_TRUE; 1949 for (k=1; k<bs; k++) { 1950 if (row+k != idx[i+k]) { /* break in the block */ 1951 flg = PETSC_FALSE; 1952 break; 1953 } 1954 } 1955 if (flg) { /* No break in the bs */ 1956 sizes[j] = bs; 1957 i+= bs; 1958 } else { 1959 sizes[j] = 1; 1960 i++; 1961 } 1962 } 1963 } 1964 *bs_max = j; 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1970 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1971 { 1972 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1973 PetscErrorCode ierr; 1974 PetscInt i,j,k,count,*rows; 1975 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1976 PetscScalar zero = 0.0; 1977 MatScalar *aa; 1978 1979 PetscFunctionBegin; 1980 /* Make a copy of the IS and sort it */ 1981 /* allocate memory for rows,sizes */ 1982 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1983 sizes = rows + is_n; 1984 1985 /* copy IS values to rows, and sort them */ 1986 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1987 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1988 if (baij->keepzeroedrows) { 1989 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1990 bs_max = is_n; 1991 A->same_nonzero = PETSC_TRUE; 1992 } else { 1993 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1994 A->same_nonzero = PETSC_FALSE; 1995 } 1996 1997 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1998 row = rows[j]; 1999 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2000 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2001 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2002 if (sizes[i] == bs && !baij->keepzeroedrows) { 2003 if (diag != 0.0) { 2004 if (baij->ilen[row/bs] > 0) { 2005 baij->ilen[row/bs] = 1; 2006 baij->j[baij->i[row/bs]] = row/bs; 2007 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2008 } 2009 /* Now insert all the diagonal values for this bs */ 2010 for (k=0; k<bs; k++) { 2011 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2012 } 2013 } else { /* (diag == 0.0) */ 2014 baij->ilen[row/bs] = 0; 2015 } /* end (diag == 0.0) */ 2016 } else { /* (sizes[i] != bs) */ 2017 #if defined (PETSC_USE_DEBUG) 2018 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2019 #endif 2020 for (k=0; k<count; k++) { 2021 aa[0] = zero; 2022 aa += bs; 2023 } 2024 if (diag != 0.0) { 2025 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2026 } 2027 } 2028 } 2029 2030 ierr = PetscFree(rows);CHKERRQ(ierr); 2031 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2037 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2038 { 2039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2040 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2041 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2042 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 2043 PetscErrorCode ierr; 2044 PetscInt ridx,cidx,bs2=a->bs2; 2045 PetscTruth roworiented=a->roworiented; 2046 MatScalar *ap,value,*aa=a->a,*bap; 2047 2048 PetscFunctionBegin; 2049 for (k=0; k<m; k++) { /* loop over added rows */ 2050 row = im[k]; 2051 brow = row/bs; 2052 if (row < 0) continue; 2053 #if defined(PETSC_USE_DEBUG) 2054 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 2055 #endif 2056 rp = aj + ai[brow]; 2057 ap = aa + bs2*ai[brow]; 2058 rmax = imax[brow]; 2059 nrow = ailen[brow]; 2060 low = 0; 2061 high = nrow; 2062 for (l=0; l<n; l++) { /* loop over added columns */ 2063 if (in[l] < 0) continue; 2064 #if defined(PETSC_USE_DEBUG) 2065 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 2066 #endif 2067 col = in[l]; bcol = col/bs; 2068 ridx = row % bs; cidx = col % bs; 2069 if (roworiented) { 2070 value = v[l + k*n]; 2071 } else { 2072 value = v[k + l*m]; 2073 } 2074 if (col <= lastcol) low = 0; else high = nrow; 2075 lastcol = col; 2076 while (high-low > 7) { 2077 t = (low+high)/2; 2078 if (rp[t] > bcol) high = t; 2079 else low = t; 2080 } 2081 for (i=low; i<high; i++) { 2082 if (rp[i] > bcol) break; 2083 if (rp[i] == bcol) { 2084 bap = ap + bs2*i + bs*cidx + ridx; 2085 if (is == ADD_VALUES) *bap += value; 2086 else *bap = value; 2087 goto noinsert1; 2088 } 2089 } 2090 if (nonew == 1) goto noinsert1; 2091 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2092 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2093 N = nrow++ - 1; high++; 2094 /* shift up all the later entries in this row */ 2095 for (ii=N; ii>=i; ii--) { 2096 rp[ii+1] = rp[ii]; 2097 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2098 } 2099 if (N>=i) { 2100 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2101 } 2102 rp[i] = bcol; 2103 ap[bs2*i + bs*cidx + ridx] = value; 2104 a->nz++; 2105 noinsert1:; 2106 low = i; 2107 } 2108 ailen[brow] = nrow; 2109 } 2110 A->same_nonzero = PETSC_FALSE; 2111 PetscFunctionReturn(0); 2112 } 2113 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2117 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 2118 { 2119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2120 Mat outA; 2121 PetscErrorCode ierr; 2122 PetscTruth row_identity,col_identity; 2123 2124 PetscFunctionBegin; 2125 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2126 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2127 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2128 if (!row_identity || !col_identity) { 2129 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2130 } 2131 2132 outA = inA; 2133 inA->factor = MAT_FACTOR_LU; 2134 2135 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2136 2137 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2138 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2139 a->row = row; 2140 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2141 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2142 a->col = col; 2143 2144 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2145 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2146 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2147 2148 /* 2149 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 2150 for ILU(0) factorization with natural ordering 2151 */ 2152 if (inA->rmap.bs < 8) { 2153 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 2154 } else { 2155 if (!a->solve_work) { 2156 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2157 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2158 } 2159 } 2160 2161 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 2162 2163 PetscFunctionReturn(0); 2164 } 2165 2166 EXTERN_C_BEGIN 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2169 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2170 { 2171 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2172 PetscInt i,nz,nbs; 2173 2174 PetscFunctionBegin; 2175 nz = baij->maxnz/baij->bs2; 2176 nbs = baij->nbs; 2177 for (i=0; i<nz; i++) { 2178 baij->j[i] = indices[i]; 2179 } 2180 baij->nz = nz; 2181 for (i=0; i<nbs; i++) { 2182 baij->ilen[i] = baij->imax[i]; 2183 } 2184 2185 PetscFunctionReturn(0); 2186 } 2187 EXTERN_C_END 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2191 /*@ 2192 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2193 in the matrix. 2194 2195 Input Parameters: 2196 + mat - the SeqBAIJ matrix 2197 - indices - the column indices 2198 2199 Level: advanced 2200 2201 Notes: 2202 This can be called if you have precomputed the nonzero structure of the 2203 matrix and want to provide it to the matrix object to improve the performance 2204 of the MatSetValues() operation. 2205 2206 You MUST have set the correct numbers of nonzeros per row in the call to 2207 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2208 2209 MUST be called before any calls to MatSetValues(); 2210 2211 @*/ 2212 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2213 { 2214 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2215 2216 PetscFunctionBegin; 2217 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2218 PetscValidPointer(indices,2); 2219 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2220 if (f) { 2221 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2222 } else { 2223 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2224 } 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2230 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2231 { 2232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2233 PetscErrorCode ierr; 2234 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2235 PetscReal atmp; 2236 PetscScalar *x,zero = 0.0; 2237 MatScalar *aa; 2238 PetscInt ncols,brow,krow,kcol; 2239 2240 PetscFunctionBegin; 2241 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2242 bs = A->rmap.bs; 2243 aa = a->a; 2244 ai = a->i; 2245 aj = a->j; 2246 mbs = a->mbs; 2247 2248 ierr = VecSet(v,zero);CHKERRQ(ierr); 2249 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2250 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2251 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2252 for (i=0; i<mbs; i++) { 2253 ncols = ai[1] - ai[0]; ai++; 2254 brow = bs*i; 2255 for (j=0; j<ncols; j++){ 2256 for (kcol=0; kcol<bs; kcol++){ 2257 for (krow=0; krow<bs; krow++){ 2258 atmp = PetscAbsScalar(*aa);aa++; 2259 row = brow + krow; /* row index */ 2260 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2261 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2262 } 2263 } 2264 aj++; 2265 } 2266 } 2267 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatCopy_SeqBAIJ" 2273 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2274 { 2275 PetscErrorCode ierr; 2276 2277 PetscFunctionBegin; 2278 /* If the two matrices have the same copy implementation, use fast copy. */ 2279 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2281 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2282 2283 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 2284 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2285 } 2286 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 2287 } else { 2288 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2295 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2296 { 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2306 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2307 { 2308 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2309 PetscFunctionBegin; 2310 *array = a->a; 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2316 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2317 { 2318 PetscFunctionBegin; 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #include "petscblaslapack.h" 2323 #undef __FUNCT__ 2324 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2325 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2326 { 2327 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2328 PetscErrorCode ierr; 2329 PetscInt i,bs=Y->rmap.bs,j,bs2; 2330 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2331 2332 PetscFunctionBegin; 2333 if (str == SAME_NONZERO_PATTERN) { 2334 PetscScalar alpha = a; 2335 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2336 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2337 if (y->xtoy && y->XtoY != X) { 2338 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2339 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2340 } 2341 if (!y->xtoy) { /* get xtoy */ 2342 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2343 y->XtoY = X; 2344 } 2345 bs2 = bs*bs; 2346 for (i=0; i<x->nz; i++) { 2347 j = 0; 2348 while (j < bs2){ 2349 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2350 j++; 2351 } 2352 } 2353 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2354 } else { 2355 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2356 } 2357 PetscFunctionReturn(0); 2358 } 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2362 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2363 { 2364 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2365 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2366 MatScalar *aa = a->a; 2367 2368 PetscFunctionBegin; 2369 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2370 PetscFunctionReturn(0); 2371 } 2372 2373 #undef __FUNCT__ 2374 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2375 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2376 { 2377 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2378 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2379 MatScalar *aa = a->a; 2380 2381 PetscFunctionBegin; 2382 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 2387 /* -------------------------------------------------------------------*/ 2388 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2389 MatGetRow_SeqBAIJ, 2390 MatRestoreRow_SeqBAIJ, 2391 MatMult_SeqBAIJ_N, 2392 /* 4*/ MatMultAdd_SeqBAIJ_N, 2393 MatMultTranspose_SeqBAIJ, 2394 MatMultTransposeAdd_SeqBAIJ, 2395 MatSolve_SeqBAIJ_N, 2396 0, 2397 0, 2398 /*10*/ 0, 2399 MatLUFactor_SeqBAIJ, 2400 0, 2401 0, 2402 MatTranspose_SeqBAIJ, 2403 /*15*/ MatGetInfo_SeqBAIJ, 2404 MatEqual_SeqBAIJ, 2405 MatGetDiagonal_SeqBAIJ, 2406 MatDiagonalScale_SeqBAIJ, 2407 MatNorm_SeqBAIJ, 2408 /*20*/ 0, 2409 MatAssemblyEnd_SeqBAIJ, 2410 0, 2411 MatSetOption_SeqBAIJ, 2412 MatZeroEntries_SeqBAIJ, 2413 /*25*/ MatZeroRows_SeqBAIJ, 2414 MatLUFactorSymbolic_SeqBAIJ, 2415 MatLUFactorNumeric_SeqBAIJ_N, 2416 MatCholeskyFactorSymbolic_SeqBAIJ, 2417 MatCholeskyFactorNumeric_SeqBAIJ_N, 2418 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2419 MatILUFactorSymbolic_SeqBAIJ, 2420 MatICCFactorSymbolic_SeqBAIJ, 2421 MatGetArray_SeqBAIJ, 2422 MatRestoreArray_SeqBAIJ, 2423 /*35*/ MatDuplicate_SeqBAIJ, 2424 0, 2425 0, 2426 MatILUFactor_SeqBAIJ, 2427 0, 2428 /*40*/ MatAXPY_SeqBAIJ, 2429 MatGetSubMatrices_SeqBAIJ, 2430 MatIncreaseOverlap_SeqBAIJ, 2431 MatGetValues_SeqBAIJ, 2432 MatCopy_SeqBAIJ, 2433 /*45*/ 0, 2434 MatScale_SeqBAIJ, 2435 0, 2436 0, 2437 0, 2438 /*50*/ 0, 2439 MatGetRowIJ_SeqBAIJ, 2440 MatRestoreRowIJ_SeqBAIJ, 2441 0, 2442 0, 2443 /*55*/ 0, 2444 0, 2445 0, 2446 0, 2447 MatSetValuesBlocked_SeqBAIJ, 2448 /*60*/ MatGetSubMatrix_SeqBAIJ, 2449 MatDestroy_SeqBAIJ, 2450 MatView_SeqBAIJ, 2451 0, 2452 0, 2453 /*65*/ 0, 2454 0, 2455 0, 2456 0, 2457 0, 2458 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2459 MatConvert_Basic, 2460 0, 2461 0, 2462 0, 2463 /*75*/ 0, 2464 0, 2465 0, 2466 0, 2467 0, 2468 /*80*/ 0, 2469 0, 2470 0, 2471 0, 2472 MatLoad_SeqBAIJ, 2473 /*85*/ 0, 2474 0, 2475 0, 2476 0, 2477 0, 2478 /*90*/ 0, 2479 0, 2480 0, 2481 0, 2482 0, 2483 /*95*/ 0, 2484 0, 2485 0, 2486 0, 2487 0, 2488 /*100*/0, 2489 0, 2490 0, 2491 0, 2492 0, 2493 /*105*/0, 2494 MatRealPart_SeqBAIJ, 2495 MatImaginaryPart_SeqBAIJ, 2496 0, 2497 0, 2498 /*110*/0, 2499 0, 2500 0, 2501 0, 2502 MatMissingDiagonal_SeqBAIJ 2503 /*115*/ 2504 }; 2505 2506 EXTERN_C_BEGIN 2507 #undef __FUNCT__ 2508 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2509 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2510 { 2511 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2512 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2513 PetscErrorCode ierr; 2514 2515 PetscFunctionBegin; 2516 if (aij->nonew != 1) { 2517 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2518 } 2519 2520 /* allocate space for values if not already there */ 2521 if (!aij->saved_values) { 2522 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2523 } 2524 2525 /* copy values over */ 2526 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 EXTERN_C_END 2530 2531 EXTERN_C_BEGIN 2532 #undef __FUNCT__ 2533 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2534 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2535 { 2536 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2537 PetscErrorCode ierr; 2538 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2539 2540 PetscFunctionBegin; 2541 if (aij->nonew != 1) { 2542 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2543 } 2544 if (!aij->saved_values) { 2545 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2546 } 2547 2548 /* copy values over */ 2549 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2550 PetscFunctionReturn(0); 2551 } 2552 EXTERN_C_END 2553 2554 EXTERN_C_BEGIN 2555 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2556 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2557 EXTERN_C_END 2558 2559 EXTERN_C_BEGIN 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2562 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2563 { 2564 Mat_SeqBAIJ *b; 2565 PetscErrorCode ierr; 2566 PetscInt i,mbs,nbs,bs2,newbs = bs; 2567 PetscTruth flg,skipallocation = PETSC_FALSE; 2568 2569 PetscFunctionBegin; 2570 2571 if (nz == MAT_SKIP_ALLOCATION) { 2572 skipallocation = PETSC_TRUE; 2573 nz = 0; 2574 } 2575 2576 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2577 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2578 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2579 2580 if (nnz && newbs != bs) { 2581 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2582 } 2583 bs = newbs; 2584 2585 B->rmap.bs = B->cmap.bs = bs; 2586 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2587 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2588 2589 B->preallocated = PETSC_TRUE; 2590 2591 mbs = B->rmap.n/bs; 2592 nbs = B->cmap.n/bs; 2593 bs2 = bs*bs; 2594 2595 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2596 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2597 } 2598 2599 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2600 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2601 if (nnz) { 2602 for (i=0; i<mbs; i++) { 2603 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2604 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2605 } 2606 } 2607 2608 b = (Mat_SeqBAIJ*)B->data; 2609 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2610 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2611 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2612 2613 B->ops->solve = MatSolve_SeqBAIJ_Update; 2614 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2615 if (!flg) { 2616 switch (bs) { 2617 case 1: 2618 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2619 B->ops->mult = MatMult_SeqBAIJ_1; 2620 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2621 B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2622 break; 2623 case 2: 2624 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2625 B->ops->mult = MatMult_SeqBAIJ_2; 2626 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2627 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2628 break; 2629 case 3: 2630 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2631 B->ops->mult = MatMult_SeqBAIJ_3; 2632 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2633 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2634 break; 2635 case 4: 2636 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2637 B->ops->mult = MatMult_SeqBAIJ_4; 2638 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2639 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2640 break; 2641 case 5: 2642 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2643 B->ops->mult = MatMult_SeqBAIJ_5; 2644 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2645 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2646 break; 2647 case 6: 2648 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2649 B->ops->mult = MatMult_SeqBAIJ_6; 2650 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2651 B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2652 break; 2653 case 7: 2654 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2655 B->ops->mult = MatMult_SeqBAIJ_7; 2656 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2657 B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2658 break; 2659 default: 2660 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2661 B->ops->mult = MatMult_SeqBAIJ_N; 2662 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2663 break; 2664 } 2665 } 2666 B->rmap.bs = bs; 2667 b->mbs = mbs; 2668 b->nbs = nbs; 2669 if (!skipallocation) { 2670 if (!b->imax) { 2671 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2672 } 2673 /* b->ilen will count nonzeros in each block row so far. */ 2674 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2675 if (!nnz) { 2676 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2677 else if (nz <= 0) nz = 1; 2678 for (i=0; i<mbs; i++) b->imax[i] = nz; 2679 nz = nz*mbs; 2680 } else { 2681 nz = 0; 2682 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2683 } 2684 2685 /* allocate the matrix space */ 2686 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2687 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2688 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2689 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2690 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2691 b->singlemalloc = PETSC_TRUE; 2692 b->i[0] = 0; 2693 for (i=1; i<mbs+1; i++) { 2694 b->i[i] = b->i[i-1] + b->imax[i-1]; 2695 } 2696 b->free_a = PETSC_TRUE; 2697 b->free_ij = PETSC_TRUE; 2698 } else { 2699 b->free_a = PETSC_FALSE; 2700 b->free_ij = PETSC_FALSE; 2701 } 2702 2703 B->rmap.bs = bs; 2704 b->bs2 = bs2; 2705 b->mbs = mbs; 2706 b->nz = 0; 2707 b->maxnz = nz*bs2; 2708 B->info.nz_unneeded = (PetscReal)b->maxnz; 2709 PetscFunctionReturn(0); 2710 } 2711 EXTERN_C_END 2712 2713 EXTERN_C_BEGIN 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2716 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2717 { 2718 PetscInt i,m,nz,nz_max=0,*nnz; 2719 PetscScalar *values=0; 2720 PetscErrorCode ierr; 2721 2722 PetscFunctionBegin; 2723 2724 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2725 B->rmap.bs = bs; 2726 B->cmap.bs = bs; 2727 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2728 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2729 m = B->rmap.n/bs; 2730 2731 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 2732 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 2733 for(i=0; i<m; i++) { 2734 nz = ii[i+1]- ii[i]; 2735 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 2736 nz_max = PetscMax(nz_max, nz); 2737 nnz[i] = nz; 2738 } 2739 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2740 ierr = PetscFree(nnz);CHKERRQ(ierr); 2741 2742 values = (PetscScalar*)V; 2743 if (!values) { 2744 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2745 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2746 } 2747 for (i=0; i<m; i++) { 2748 PetscInt ncols = ii[i+1] - ii[i]; 2749 const PetscInt *icols = jj + ii[i]; 2750 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2751 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2752 } 2753 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2754 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 2757 PetscFunctionReturn(0); 2758 } 2759 EXTERN_C_END 2760 2761 2762 EXTERN_C_BEGIN 2763 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2764 EXTERN_C_END 2765 2766 /*MC 2767 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2768 block sparse compressed row format. 2769 2770 Options Database Keys: 2771 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2772 2773 Level: beginner 2774 2775 .seealso: MatCreateSeqBAIJ() 2776 M*/ 2777 2778 2779 EXTERN_C_BEGIN 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatCreate_SeqBAIJ" 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2783 { 2784 PetscErrorCode ierr; 2785 PetscMPIInt size; 2786 Mat_SeqBAIJ *b; 2787 2788 PetscFunctionBegin; 2789 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2790 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2791 2792 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2793 B->data = (void*)b; 2794 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2795 B->mapping = 0; 2796 b->row = 0; 2797 b->col = 0; 2798 b->icol = 0; 2799 b->reallocs = 0; 2800 b->saved_values = 0; 2801 2802 b->roworiented = PETSC_TRUE; 2803 b->nonew = 0; 2804 b->diag = 0; 2805 b->solve_work = 0; 2806 b->mult_work = 0; 2807 B->spptr = 0; 2808 B->info.nz_unneeded = (PetscReal)b->maxnz; 2809 b->keepzeroedrows = PETSC_FALSE; 2810 b->xtoy = 0; 2811 b->XtoY = 0; 2812 b->compressedrow.use = PETSC_FALSE; 2813 b->compressedrow.nrows = 0; 2814 b->compressedrow.i = PETSC_NULL; 2815 b->compressedrow.rindex = PETSC_NULL; 2816 b->compressedrow.checked = PETSC_FALSE; 2817 B->same_nonzero = PETSC_FALSE; 2818 2819 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 2820 "MatGetFactor_seqbaij_petsc", 2821 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 2822 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2823 "MatInvertBlockDiagonal_SeqBAIJ", 2824 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2825 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2826 "MatStoreValues_SeqBAIJ", 2827 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2828 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2829 "MatRetrieveValues_SeqBAIJ", 2830 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2832 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2833 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2835 "MatConvert_SeqBAIJ_SeqAIJ", 2836 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2838 "MatConvert_SeqBAIJ_SeqSBAIJ", 2839 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2841 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2842 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 2844 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 2845 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 2846 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2847 PetscFunctionReturn(0); 2848 } 2849 EXTERN_C_END 2850 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 2853 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2854 { 2855 Mat C = *B; 2856 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 2857 PetscErrorCode ierr; 2858 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2859 2860 PetscFunctionBegin; 2861 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2862 2863 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2864 for (i=0; i<mbs; i++) { 2865 c->imax[i] = a->imax[i]; 2866 c->ilen[i] = a->ilen[i]; 2867 } 2868 2869 /* allocate the matrix space */ 2870 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2871 c->singlemalloc = PETSC_TRUE; 2872 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2873 if (mbs > 0) { 2874 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2875 if (cpvalues == MAT_COPY_VALUES) { 2876 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2877 } else { 2878 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2879 } 2880 } 2881 c->roworiented = a->roworiented; 2882 c->nonew = a->nonew; 2883 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 2884 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 2885 c->bs2 = a->bs2; 2886 c->mbs = a->mbs; 2887 c->nbs = a->nbs; 2888 2889 if (a->diag) { 2890 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2891 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2892 for (i=0; i<mbs; i++) { 2893 c->diag[i] = a->diag[i]; 2894 } 2895 } else c->diag = 0; 2896 c->nz = a->nz; 2897 c->maxnz = a->maxnz; 2898 c->solve_work = 0; 2899 c->mult_work = 0; 2900 c->free_a = PETSC_TRUE; 2901 c->free_ij = PETSC_TRUE; 2902 C->preallocated = PETSC_TRUE; 2903 C->assembled = PETSC_TRUE; 2904 2905 c->compressedrow.use = a->compressedrow.use; 2906 c->compressedrow.nrows = a->compressedrow.nrows; 2907 c->compressedrow.checked = a->compressedrow.checked; 2908 if ( a->compressedrow.checked && a->compressedrow.use){ 2909 i = a->compressedrow.nrows; 2910 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2911 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2912 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2913 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2914 } else { 2915 c->compressedrow.use = PETSC_FALSE; 2916 c->compressedrow.i = PETSC_NULL; 2917 c->compressedrow.rindex = PETSC_NULL; 2918 } 2919 C->same_nonzero = A->same_nonzero; 2920 *B = C; 2921 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2922 PetscFunctionReturn(0); 2923 } 2924 2925 #undef __FUNCT__ 2926 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2927 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2928 { 2929 PetscErrorCode ierr; 2930 2931 PetscFunctionBegin; 2932 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 2933 ierr = MatSetSizes(*B,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2934 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 2935 ierr = MatDuplicateNoCreate_SeqBAIJ(A,cpvalues,B); 2936 PetscFunctionReturn(0); 2937 } 2938 2939 #undef __FUNCT__ 2940 #define __FUNCT__ "MatLoad_SeqBAIJ" 2941 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 2942 { 2943 Mat_SeqBAIJ *a; 2944 Mat B; 2945 PetscErrorCode ierr; 2946 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2947 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2948 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2949 PetscInt *masked,nmask,tmp,bs2,ishift; 2950 PetscMPIInt size; 2951 int fd; 2952 PetscScalar *aa; 2953 MPI_Comm comm = ((PetscObject)viewer)->comm; 2954 2955 PetscFunctionBegin; 2956 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2957 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2958 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2959 bs2 = bs*bs; 2960 2961 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2962 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2963 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2964 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2965 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2966 M = header[1]; N = header[2]; nz = header[3]; 2967 2968 if (header[3] < 0) { 2969 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2970 } 2971 2972 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2973 2974 /* 2975 This code adds extra rows to make sure the number of rows is 2976 divisible by the blocksize 2977 */ 2978 mbs = M/bs; 2979 extra_rows = bs - M + bs*(mbs); 2980 if (extra_rows == bs) extra_rows = 0; 2981 else mbs++; 2982 if (extra_rows) { 2983 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2984 } 2985 2986 /* read in row lengths */ 2987 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2988 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2989 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2990 2991 /* read in column indices */ 2992 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2993 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2994 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2995 2996 /* loop over row lengths determining block row lengths */ 2997 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2998 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2999 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3000 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3001 masked = mask + mbs; 3002 rowcount = 0; nzcount = 0; 3003 for (i=0; i<mbs; i++) { 3004 nmask = 0; 3005 for (j=0; j<bs; j++) { 3006 kmax = rowlengths[rowcount]; 3007 for (k=0; k<kmax; k++) { 3008 tmp = jj[nzcount++]/bs; 3009 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3010 } 3011 rowcount++; 3012 } 3013 browlengths[i] += nmask; 3014 /* zero out the mask elements we set */ 3015 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3016 } 3017 3018 /* create our matrix */ 3019 ierr = MatCreate(comm,&B); 3020 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3021 ierr = MatSetType(B,type);CHKERRQ(ierr); 3022 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3023 a = (Mat_SeqBAIJ*)B->data; 3024 3025 /* set matrix "i" values */ 3026 a->i[0] = 0; 3027 for (i=1; i<= mbs; i++) { 3028 a->i[i] = a->i[i-1] + browlengths[i-1]; 3029 a->ilen[i-1] = browlengths[i-1]; 3030 } 3031 a->nz = 0; 3032 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3033 3034 /* read in nonzero values */ 3035 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3036 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3037 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3038 3039 /* set "a" and "j" values into matrix */ 3040 nzcount = 0; jcount = 0; 3041 for (i=0; i<mbs; i++) { 3042 nzcountb = nzcount; 3043 nmask = 0; 3044 for (j=0; j<bs; j++) { 3045 kmax = rowlengths[i*bs+j]; 3046 for (k=0; k<kmax; k++) { 3047 tmp = jj[nzcount++]/bs; 3048 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3049 } 3050 } 3051 /* sort the masked values */ 3052 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3053 3054 /* set "j" values into matrix */ 3055 maskcount = 1; 3056 for (j=0; j<nmask; j++) { 3057 a->j[jcount++] = masked[j]; 3058 mask[masked[j]] = maskcount++; 3059 } 3060 /* set "a" values into matrix */ 3061 ishift = bs2*a->i[i]; 3062 for (j=0; j<bs; j++) { 3063 kmax = rowlengths[i*bs+j]; 3064 for (k=0; k<kmax; k++) { 3065 tmp = jj[nzcountb]/bs ; 3066 block = mask[tmp] - 1; 3067 point = jj[nzcountb] - bs*tmp; 3068 idx = ishift + bs2*block + j + bs*point; 3069 a->a[idx] = (MatScalar)aa[nzcountb++]; 3070 } 3071 } 3072 /* zero out the mask elements we set */ 3073 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3074 } 3075 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3076 3077 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3078 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3079 ierr = PetscFree(aa);CHKERRQ(ierr); 3080 ierr = PetscFree(jj);CHKERRQ(ierr); 3081 ierr = PetscFree(mask);CHKERRQ(ierr); 3082 3083 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3084 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3085 ierr = MatView_Private(B);CHKERRQ(ierr); 3086 3087 *A = B; 3088 PetscFunctionReturn(0); 3089 } 3090 3091 #undef __FUNCT__ 3092 #define __FUNCT__ "MatCreateSeqBAIJ" 3093 /*@C 3094 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3095 compressed row) format. For good matrix assembly performance the 3096 user should preallocate the matrix storage by setting the parameter nz 3097 (or the array nnz). By setting these parameters accurately, performance 3098 during matrix assembly can be increased by more than a factor of 50. 3099 3100 Collective on MPI_Comm 3101 3102 Input Parameters: 3103 + comm - MPI communicator, set to PETSC_COMM_SELF 3104 . bs - size of block 3105 . m - number of rows 3106 . n - number of columns 3107 . nz - number of nonzero blocks per block row (same for all rows) 3108 - nnz - array containing the number of nonzero blocks in the various block rows 3109 (possibly different for each block row) or PETSC_NULL 3110 3111 Output Parameter: 3112 . A - the matrix 3113 3114 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3115 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 3116 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 3117 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3118 3119 Options Database Keys: 3120 . -mat_no_unroll - uses code that does not unroll the loops in the 3121 block calculations (much slower) 3122 . -mat_block_size - size of the blocks to use 3123 3124 Level: intermediate 3125 3126 Notes: 3127 The number of rows and columns must be divisible by blocksize. 3128 3129 If the nnz parameter is given then the nz parameter is ignored 3130 3131 A nonzero block is any block that as 1 or more nonzeros in it 3132 3133 The block AIJ format is fully compatible with standard Fortran 77 3134 storage. That is, the stored row and column indices can begin at 3135 either one (as in Fortran) or zero. See the users' manual for details. 3136 3137 Specify the preallocated storage with either nz or nnz (not both). 3138 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3139 allocation. For additional details, see the users manual chapter on 3140 matrices. 3141 3142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3143 @*/ 3144 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3145 { 3146 PetscErrorCode ierr; 3147 3148 PetscFunctionBegin; 3149 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3150 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3151 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3152 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3153 PetscFunctionReturn(0); 3154 } 3155 3156 #undef __FUNCT__ 3157 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3158 /*@C 3159 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3160 per row in the matrix. For good matrix assembly performance the 3161 user should preallocate the matrix storage by setting the parameter nz 3162 (or the array nnz). By setting these parameters accurately, performance 3163 during matrix assembly can be increased by more than a factor of 50. 3164 3165 Collective on MPI_Comm 3166 3167 Input Parameters: 3168 + A - the matrix 3169 . bs - size of block 3170 . nz - number of block nonzeros per block row (same for all rows) 3171 - nnz - array containing the number of block nonzeros in the various block rows 3172 (possibly different for each block row) or PETSC_NULL 3173 3174 Options Database Keys: 3175 . -mat_no_unroll - uses code that does not unroll the loops in the 3176 block calculations (much slower) 3177 . -mat_block_size - size of the blocks to use 3178 3179 Level: intermediate 3180 3181 Notes: 3182 If the nnz parameter is given then the nz parameter is ignored 3183 3184 You can call MatGetInfo() to get information on how effective the preallocation was; 3185 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3186 You can also run with the option -info and look for messages with the string 3187 malloc in them to see if additional memory allocation was needed. 3188 3189 The block AIJ format is fully compatible with standard Fortran 77 3190 storage. That is, the stored row and column indices can begin at 3191 either one (as in Fortran) or zero. See the users' manual for details. 3192 3193 Specify the preallocated storage with either nz or nnz (not both). 3194 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3195 allocation. For additional details, see the users manual chapter on 3196 matrices. 3197 3198 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3199 @*/ 3200 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3201 { 3202 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3203 3204 PetscFunctionBegin; 3205 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3206 if (f) { 3207 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3208 } 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3214 /*@C 3215 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3216 (the default sequential PETSc format). 3217 3218 Collective on MPI_Comm 3219 3220 Input Parameters: 3221 + A - the matrix 3222 . i - the indices into j for the start of each local row (starts with zero) 3223 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3224 - v - optional values in the matrix 3225 3226 Level: developer 3227 3228 .keywords: matrix, aij, compressed row, sparse 3229 3230 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3231 @*/ 3232 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3233 { 3234 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3235 3236 PetscFunctionBegin; 3237 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3238 if (f) { 3239 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3240 } 3241 PetscFunctionReturn(0); 3242 } 3243 3244 3245 #undef __FUNCT__ 3246 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3247 /*@ 3248 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3249 (upper triangular entries in CSR format) provided by the user. 3250 3251 Collective on MPI_Comm 3252 3253 Input Parameters: 3254 + comm - must be an MPI communicator of size 1 3255 . bs - size of block 3256 . m - number of rows 3257 . n - number of columns 3258 . i - row indices 3259 . j - column indices 3260 - a - matrix values 3261 3262 Output Parameter: 3263 . mat - the matrix 3264 3265 Level: intermediate 3266 3267 Notes: 3268 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3269 once the matrix is destroyed 3270 3271 You cannot set new nonzero locations into this matrix, that will generate an error. 3272 3273 The i and j indices are 0 based 3274 3275 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3276 3277 @*/ 3278 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3279 { 3280 PetscErrorCode ierr; 3281 PetscInt ii; 3282 Mat_SeqBAIJ *baij; 3283 3284 PetscFunctionBegin; 3285 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3286 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3287 3288 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3289 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3290 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3291 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3292 baij = (Mat_SeqBAIJ*)(*mat)->data; 3293 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3294 3295 baij->i = i; 3296 baij->j = j; 3297 baij->a = a; 3298 baij->singlemalloc = PETSC_FALSE; 3299 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3300 baij->free_a = PETSC_FALSE; 3301 baij->free_ij = PETSC_FALSE; 3302 3303 for (ii=0; ii<m; ii++) { 3304 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3305 #if defined(PETSC_USE_DEBUG) 3306 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3307 #endif 3308 } 3309 #if defined(PETSC_USE_DEBUG) 3310 for (ii=0; ii<baij->i[m]; ii++) { 3311 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3312 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3313 } 3314 #endif 3315 3316 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3317 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320