1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" 8 #include "petscsys.h" /*I "petscmat.h" I*/ 9 10 #include "../src/mat/blockinvert.h" 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14 /*@ 15 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 16 17 Collective on Mat 18 19 Input Parameters: 20 . mat - the matrix 21 22 Level: advanced 23 @*/ 24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 25 { 26 PetscErrorCode ierr,(*f)(Mat); 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 30 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 31 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 32 33 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 34 if (f) { 35 ierr = (*f)(mat);CHKERRQ(ierr); 36 } else { 37 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 38 } 39 PetscFunctionReturn(0); 40 } 41 42 EXTERN_C_BEGIN 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46 { 47 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 48 PetscErrorCode ierr; 49 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 50 MatScalar *v = a->a,*odiag,*diag,*mdiag; 51 PetscReal shift = 0.0; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 60 } 61 diag = a->idiag; 62 mdiag = a->idiag+bs*bs*mbs; 63 /* factor and invert each block */ 64 switch (bs){ 65 case 1: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 1*diag_offset[i]; 68 diag[0] = odiag[0]; 69 mdiag[0] = odiag[0]; 70 diag[0] = 1.0 / (diag[0] + shift); 71 diag += 1; 72 mdiag += 1; 73 } 74 break; 75 case 2: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 4*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 80 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 81 diag += 4; 82 mdiag += 4; 83 } 84 break; 85 case 3: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 9*diag_offset[i]; 88 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 89 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 90 diag[8] = odiag[8]; 91 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 92 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 93 mdiag[8] = odiag[8]; 94 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 95 diag += 9; 96 mdiag += 9; 97 } 98 break; 99 case 4: 100 for (i=0; i<mbs; i++) { 101 odiag = v + 16*diag_offset[i]; 102 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 104 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 105 diag += 16; 106 mdiag += 16; 107 } 108 break; 109 case 5: 110 for (i=0; i<mbs; i++) { 111 odiag = v + 25*diag_offset[i]; 112 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 114 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 115 diag += 25; 116 mdiag += 25; 117 } 118 break; 119 case 6: 120 for (i=0; i<mbs; i++) { 121 odiag = v + 36*diag_offset[i]; 122 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 124 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 125 diag += 36; 126 mdiag += 36; 127 } 128 break; 129 default: 130 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 139 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 140 { 141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 142 PetscScalar *x,x1,s1; 143 const PetscScalar *b; 144 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 145 PetscErrorCode ierr; 146 PetscInt m = a->mbs,i,i2,nz,j; 147 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 148 149 PetscFunctionBegin; 150 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 151 its = its*lits; 152 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 153 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 154 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 155 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 156 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 157 158 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 159 160 diag = a->diag; 161 idiag = a->idiag; 162 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 163 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 164 165 if (flag & SOR_ZERO_INITIAL_GUESS) { 166 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 167 x[0] = b[0]*idiag[0]; 168 i2 = 1; 169 idiag += 1; 170 for (i=1; i<m; i++) { 171 v = aa + ai[i]; 172 vi = aj + ai[i]; 173 nz = diag[i] - ai[i]; 174 s1 = b[i2]; 175 for (j=0; j<nz; j++) { 176 s1 -= v[j]*x[vi[j]]; 177 } 178 x[i2] = idiag[0]*s1; 179 idiag += 1; 180 i2 += 1; 181 } 182 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 183 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 184 } 185 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 186 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 187 i2 = 0; 188 mdiag = a->idiag+a->mbs; 189 for (i=0; i<m; i++) { 190 x1 = x[i2]; 191 x[i2] = mdiag[0]*x1; 192 mdiag += 1; 193 i2 += 1; 194 } 195 ierr = PetscLogFlops(m);CHKERRQ(ierr); 196 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 197 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 198 } 199 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 200 idiag = a->idiag+a->mbs - 1; 201 i2 = m - 1; 202 x1 = x[i2]; 203 x[i2] = idiag[0]*x1; 204 idiag -= 1; 205 i2 -= 1; 206 for (i=m-2; i>=0; i--) { 207 v = aa + (diag[i]+1); 208 vi = aj + diag[i] + 1; 209 nz = ai[i+1] - diag[i] - 1; 210 s1 = x[i2]; 211 for (j=0; j<nz; j++) { 212 s1 -= v[j]*x[vi[j]]; 213 } 214 x[i2] = idiag[0]*s1; 215 idiag -= 1; 216 i2 -= 1; 217 } 218 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 219 } 220 } else { 221 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 222 } 223 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 224 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 #undef __FUNCT__ 229 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 230 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 231 { 232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 233 PetscScalar *x,x1,x2,s1,s2; 234 const PetscScalar *b; 235 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 236 PetscErrorCode ierr; 237 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 238 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 239 240 PetscFunctionBegin; 241 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 242 its = its*lits; 243 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 244 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 245 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 246 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 247 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 248 249 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 250 251 diag = a->diag; 252 idiag = a->idiag; 253 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 254 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 255 256 if (flag & SOR_ZERO_INITIAL_GUESS) { 257 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 258 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 259 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 260 i2 = 2; 261 idiag += 4; 262 for (i=1; i<m; i++) { 263 v = aa + 4*ai[i]; 264 vi = aj + ai[i]; 265 nz = diag[i] - ai[i]; 266 s1 = b[i2]; s2 = b[i2+1]; 267 for (j=0; j<nz; j++) { 268 idx = 2*vi[j]; 269 it = 4*j; 270 x1 = x[idx]; x2 = x[1+idx]; 271 s1 -= v[it]*x1 + v[it+2]*x2; 272 s2 -= v[it+1]*x1 + v[it+3]*x2; 273 } 274 x[i2] = idiag[0]*s1 + idiag[2]*s2; 275 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 276 idiag += 4; 277 i2 += 2; 278 } 279 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 280 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 281 } 282 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 283 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 284 i2 = 0; 285 mdiag = a->idiag+4*a->mbs; 286 for (i=0; i<m; i++) { 287 x1 = x[i2]; x2 = x[i2+1]; 288 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 289 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 290 mdiag += 4; 291 i2 += 2; 292 } 293 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 294 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 295 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 296 } 297 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 298 idiag = a->idiag+4*a->mbs - 4; 299 i2 = 2*m - 2; 300 x1 = x[i2]; x2 = x[i2+1]; 301 x[i2] = idiag[0]*x1 + idiag[2]*x2; 302 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 303 idiag -= 4; 304 i2 -= 2; 305 for (i=m-2; i>=0; i--) { 306 v = aa + 4*(diag[i]+1); 307 vi = aj + diag[i] + 1; 308 nz = ai[i+1] - diag[i] - 1; 309 s1 = x[i2]; s2 = x[i2+1]; 310 for (j=0; j<nz; j++) { 311 idx = 2*vi[j]; 312 it = 4*j; 313 x1 = x[idx]; x2 = x[1+idx]; 314 s1 -= v[it]*x1 + v[it+2]*x2; 315 s2 -= v[it+1]*x1 + v[it+3]*x2; 316 } 317 x[i2] = idiag[0]*s1 + idiag[2]*s2; 318 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 319 idiag -= 4; 320 i2 -= 2; 321 } 322 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 323 } 324 } else { 325 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 326 } 327 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 328 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 329 PetscFunctionReturn(0); 330 } 331 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 334 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 335 { 336 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 337 PetscScalar *x,x1,x2,x3,s1,s2,s3; 338 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 339 const PetscScalar *b; 340 PetscErrorCode ierr; 341 PetscInt m = a->mbs,i,i2,nz,idx; 342 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 343 344 PetscFunctionBegin; 345 its = its*lits; 346 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 347 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 348 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 349 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 350 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 351 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 352 353 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 354 355 diag = a->diag; 356 idiag = a->idiag; 357 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 358 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 359 360 if (flag & SOR_ZERO_INITIAL_GUESS) { 361 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 362 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 363 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 364 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 365 i2 = 3; 366 idiag += 9; 367 for (i=1; i<m; i++) { 368 v = aa + 9*ai[i]; 369 vi = aj + ai[i]; 370 nz = diag[i] - ai[i]; 371 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 372 while (nz--) { 373 idx = 3*(*vi++); 374 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 375 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 376 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 377 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 378 v += 9; 379 } 380 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 381 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 382 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 383 idiag += 9; 384 i2 += 3; 385 } 386 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 387 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 388 } 389 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 390 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 391 i2 = 0; 392 mdiag = a->idiag+9*a->mbs; 393 for (i=0; i<m; i++) { 394 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 395 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 396 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 397 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 398 mdiag += 9; 399 i2 += 3; 400 } 401 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 402 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 403 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 404 } 405 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 406 idiag = a->idiag+9*a->mbs - 9; 407 i2 = 3*m - 3; 408 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 409 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 410 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 411 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 412 idiag -= 9; 413 i2 -= 3; 414 for (i=m-2; i>=0; i--) { 415 v = aa + 9*(diag[i]+1); 416 vi = aj + diag[i] + 1; 417 nz = ai[i+1] - diag[i] - 1; 418 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 419 while (nz--) { 420 idx = 3*(*vi++); 421 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 422 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 423 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 424 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 425 v += 9; 426 } 427 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 428 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 429 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 430 idiag -= 9; 431 i2 -= 3; 432 } 433 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 445 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 449 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 450 const PetscScalar *b; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 457 its = its*lits; 458 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 459 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 460 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 461 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 462 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 463 464 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 465 466 diag = a->diag; 467 idiag = a->idiag; 468 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 469 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 470 471 if (flag & SOR_ZERO_INITIAL_GUESS) { 472 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 473 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 474 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 475 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 476 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 477 i2 = 4; 478 idiag += 16; 479 for (i=1; i<m; i++) { 480 v = aa + 16*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 484 while (nz--) { 485 idx = 4*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 487 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 488 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 489 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 490 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 491 v += 16; 492 } 493 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 494 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 495 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 496 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 497 idiag += 16; 498 i2 += 4; 499 } 500 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 501 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 502 } 503 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 504 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 505 i2 = 0; 506 mdiag = a->idiag+16*a->mbs; 507 for (i=0; i<m; i++) { 508 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 509 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 510 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 511 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 512 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 513 mdiag += 16; 514 i2 += 4; 515 } 516 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 517 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 518 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 519 } 520 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 521 idiag = a->idiag+16*a->mbs - 16; 522 i2 = 4*m - 4; 523 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 524 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 525 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 526 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 527 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 528 idiag -= 16; 529 i2 -= 4; 530 for (i=m-2; i>=0; i--) { 531 v = aa + 16*(diag[i]+1); 532 vi = aj + diag[i] + 1; 533 nz = ai[i+1] - diag[i] - 1; 534 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 535 while (nz--) { 536 idx = 4*(*vi++); 537 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 538 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 539 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 540 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 541 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 542 v += 16; 543 } 544 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 545 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 546 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 547 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 548 idiag -= 16; 549 i2 -= 4; 550 } 551 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 552 } 553 } else { 554 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 555 } 556 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 557 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 563 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 564 { 565 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 566 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 567 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 568 const PetscScalar *b; 569 PetscErrorCode ierr; 570 PetscInt m = a->mbs,i,i2,nz,idx; 571 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 572 573 PetscFunctionBegin; 574 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 575 its = its*lits; 576 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 577 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 578 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 579 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 580 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 581 582 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 583 584 diag = a->diag; 585 idiag = a->idiag; 586 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 587 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 588 589 if (flag & SOR_ZERO_INITIAL_GUESS) { 590 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 591 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 592 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 593 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 594 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 595 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 596 i2 = 5; 597 idiag += 25; 598 for (i=1; i<m; i++) { 599 v = aa + 25*ai[i]; 600 vi = aj + ai[i]; 601 nz = diag[i] - ai[i]; 602 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 603 while (nz--) { 604 idx = 5*(*vi++); 605 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 606 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 607 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 608 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 609 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 610 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 611 v += 25; 612 } 613 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 614 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 615 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 616 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 617 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 618 idiag += 25; 619 i2 += 5; 620 } 621 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 622 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 623 } 624 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 625 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 626 i2 = 0; 627 mdiag = a->idiag+25*a->mbs; 628 for (i=0; i<m; i++) { 629 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 630 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 631 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 632 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 633 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 634 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 635 mdiag += 25; 636 i2 += 5; 637 } 638 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 639 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 640 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 641 } 642 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 643 idiag = a->idiag+25*a->mbs - 25; 644 i2 = 5*m - 5; 645 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 646 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 647 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 648 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 649 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 650 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 651 idiag -= 25; 652 i2 -= 5; 653 for (i=m-2; i>=0; i--) { 654 v = aa + 25*(diag[i]+1); 655 vi = aj + diag[i] + 1; 656 nz = ai[i+1] - diag[i] - 1; 657 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 658 while (nz--) { 659 idx = 5*(*vi++); 660 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 661 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 662 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 663 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 664 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 665 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 666 v += 25; 667 } 668 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 669 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 670 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 671 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 672 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 673 idiag -= 25; 674 i2 -= 5; 675 } 676 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 677 } 678 } else { 679 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 680 } 681 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 682 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 688 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 692 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 693 const PetscScalar *b; 694 PetscErrorCode ierr; 695 PetscInt m = a->mbs,i,i2,nz,idx; 696 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 697 698 PetscFunctionBegin; 699 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 700 its = its*lits; 701 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 702 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 703 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 704 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 705 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 706 707 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 708 709 diag = a->diag; 710 idiag = a->idiag; 711 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 712 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 713 714 if (flag & SOR_ZERO_INITIAL_GUESS) { 715 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 716 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 717 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 718 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 719 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 720 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 721 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 722 i2 = 6; 723 idiag += 36; 724 for (i=1; i<m; i++) { 725 v = aa + 36*ai[i]; 726 vi = aj + ai[i]; 727 nz = diag[i] - ai[i]; 728 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 729 while (nz--) { 730 idx = 6*(*vi++); 731 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 732 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 733 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 734 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 735 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 736 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 737 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 738 v += 36; 739 } 740 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 741 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 742 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 743 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 744 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 745 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 746 idiag += 36; 747 i2 += 6; 748 } 749 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 750 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 751 } 752 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 753 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 754 i2 = 0; 755 mdiag = a->idiag+36*a->mbs; 756 for (i=0; i<m; i++) { 757 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 758 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 759 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 760 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 761 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 762 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 763 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 764 mdiag += 36; 765 i2 += 6; 766 } 767 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 768 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 769 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 770 } 771 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 772 idiag = a->idiag+36*a->mbs - 36; 773 i2 = 6*m - 6; 774 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 775 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 776 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 777 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 778 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 779 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 780 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 781 idiag -= 36; 782 i2 -= 6; 783 for (i=m-2; i>=0; i--) { 784 v = aa + 36*(diag[i]+1); 785 vi = aj + diag[i] + 1; 786 nz = ai[i+1] - diag[i] - 1; 787 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 788 while (nz--) { 789 idx = 6*(*vi++); 790 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 791 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 792 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 793 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 794 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 795 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 796 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 797 v += 36; 798 } 799 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 800 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 801 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 802 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 803 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 804 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 805 idiag -= 36; 806 i2 -= 6; 807 } 808 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 809 } 810 } else { 811 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 812 } 813 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 814 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 815 PetscFunctionReturn(0); 816 } 817 818 #undef __FUNCT__ 819 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 820 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 821 { 822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 823 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 824 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 825 const PetscScalar *b; 826 PetscErrorCode ierr; 827 PetscInt m = a->mbs,i,i2,nz,idx; 828 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 829 830 PetscFunctionBegin; 831 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 832 its = its*lits; 833 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 834 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 835 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 836 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 837 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 838 839 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 840 841 diag = a->diag; 842 idiag = a->idiag; 843 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 844 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 845 846 if (flag & SOR_ZERO_INITIAL_GUESS) { 847 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 848 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 849 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 850 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 851 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 852 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 853 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 854 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 855 i2 = 7; 856 idiag += 49; 857 for (i=1; i<m; i++) { 858 v = aa + 49*ai[i]; 859 vi = aj + ai[i]; 860 nz = diag[i] - ai[i]; 861 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 862 while (nz--) { 863 idx = 7*(*vi++); 864 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 865 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 866 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 867 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 868 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 869 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 870 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 871 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 872 v += 49; 873 } 874 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 875 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 876 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 877 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 878 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 879 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 880 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 881 idiag += 49; 882 i2 += 7; 883 } 884 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 885 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 886 } 887 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 888 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 889 i2 = 0; 890 mdiag = a->idiag+49*a->mbs; 891 for (i=0; i<m; i++) { 892 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 893 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 894 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 895 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 896 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 897 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 898 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 899 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 900 mdiag += 36; 901 i2 += 6; 902 } 903 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 904 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 905 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 906 } 907 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 908 idiag = a->idiag+49*a->mbs - 49; 909 i2 = 7*m - 7; 910 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 911 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 912 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 913 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 914 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 915 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 916 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 917 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 918 idiag -= 49; 919 i2 -= 7; 920 for (i=m-2; i>=0; i--) { 921 v = aa + 49*(diag[i]+1); 922 vi = aj + diag[i] + 1; 923 nz = ai[i+1] - diag[i] - 1; 924 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 925 while (nz--) { 926 idx = 7*(*vi++); 927 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 928 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 929 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 930 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 931 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 932 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 933 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 934 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 935 v += 49; 936 } 937 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 938 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 939 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 940 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 941 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 942 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 943 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 944 idiag -= 49; 945 i2 -= 7; 946 } 947 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 948 } 949 } else { 950 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 951 } 952 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 953 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 /* 958 Special version for direct calls from Fortran (Used in PETSc-fun3d) 959 */ 960 #if defined(PETSC_HAVE_FORTRAN_CAPS) 961 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 962 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 963 #define matsetvaluesblocked4_ matsetvaluesblocked4 964 #endif 965 966 EXTERN_C_BEGIN 967 #undef __FUNCT__ 968 #define __FUNCT__ "matsetvaluesblocked4_" 969 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 970 { 971 Mat A = *AA; 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 974 PetscInt *ai=a->i,*ailen=a->ilen; 975 PetscInt *aj=a->j,stepval,lastcol = -1; 976 const PetscScalar *value = v; 977 MatScalar *ap,*aa = a->a,*bap; 978 979 PetscFunctionBegin; 980 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 981 stepval = (n-1)*4; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; 984 rp = aj + ai[row]; 985 ap = aa + 16*ai[row]; 986 nrow = ailen[row]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; 991 if (col <= lastcol) low = 0; else high = nrow; 992 lastcol = col; 993 value = v + k*(stepval+4 + l)*4; 994 while (high-low > 7) { 995 t = (low+high)/2; 996 if (rp[t] > col) high = t; 997 else low = t; 998 } 999 for (i=low; i<high; i++) { 1000 if (rp[i] > col) break; 1001 if (rp[i] == col) { 1002 bap = ap + 16*i; 1003 for (ii=0; ii<4; ii++,value+=stepval) { 1004 for (jj=ii; jj<16; jj+=4) { 1005 bap[jj] += *value++; 1006 } 1007 } 1008 goto noinsert2; 1009 } 1010 } 1011 N = nrow++ - 1; 1012 high++; /* added new column index thus must search to one higher than before */ 1013 /* shift up all the later entries in this row */ 1014 for (ii=N; ii>=i; ii--) { 1015 rp[ii+1] = rp[ii]; 1016 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1017 } 1018 if (N >= i) { 1019 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1020 } 1021 rp[i] = col; 1022 bap = ap + 16*i; 1023 for (ii=0; ii<4; ii++,value+=stepval) { 1024 for (jj=ii; jj<16; jj+=4) { 1025 bap[jj] = *value++; 1026 } 1027 } 1028 noinsert2:; 1029 low = i; 1030 } 1031 ailen[row] = nrow; 1032 } 1033 PetscFunctionReturnVoid(); 1034 } 1035 EXTERN_C_END 1036 1037 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1038 #define matsetvalues4_ MATSETVALUES4 1039 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1040 #define matsetvalues4_ matsetvalues4 1041 #endif 1042 1043 EXTERN_C_BEGIN 1044 #undef __FUNCT__ 1045 #define __FUNCT__ "MatSetValues4_" 1046 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1047 { 1048 Mat A = *AA; 1049 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1050 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1051 PetscInt *ai=a->i,*ailen=a->ilen; 1052 PetscInt *aj=a->j,brow,bcol; 1053 PetscInt ridx,cidx,lastcol = -1; 1054 MatScalar *ap,value,*aa=a->a,*bap; 1055 1056 PetscFunctionBegin; 1057 for (k=0; k<m; k++) { /* loop over added rows */ 1058 row = im[k]; brow = row/4; 1059 rp = aj + ai[brow]; 1060 ap = aa + 16*ai[brow]; 1061 nrow = ailen[brow]; 1062 low = 0; 1063 high = nrow; 1064 for (l=0; l<n; l++) { /* loop over added columns */ 1065 col = in[l]; bcol = col/4; 1066 ridx = row % 4; cidx = col % 4; 1067 value = v[l + k*n]; 1068 if (col <= lastcol) low = 0; else high = nrow; 1069 lastcol = col; 1070 while (high-low > 7) { 1071 t = (low+high)/2; 1072 if (rp[t] > bcol) high = t; 1073 else low = t; 1074 } 1075 for (i=low; i<high; i++) { 1076 if (rp[i] > bcol) break; 1077 if (rp[i] == bcol) { 1078 bap = ap + 16*i + 4*cidx + ridx; 1079 *bap += value; 1080 goto noinsert1; 1081 } 1082 } 1083 N = nrow++ - 1; 1084 high++; /* added new column thus must search to one higher than before */ 1085 /* shift up all the later entries in this row */ 1086 for (ii=N; ii>=i; ii--) { 1087 rp[ii+1] = rp[ii]; 1088 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1089 } 1090 if (N>=i) { 1091 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1092 } 1093 rp[i] = bcol; 1094 ap[16*i + 4*cidx + ridx] = value; 1095 noinsert1:; 1096 low = i; 1097 } 1098 ailen[brow] = nrow; 1099 } 1100 PetscFunctionReturnVoid(); 1101 } 1102 EXTERN_C_END 1103 1104 /* 1105 Checks for missing diagonals 1106 */ 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1109 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1110 { 1111 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 PetscInt *diag,*jj = a->j,i; 1114 1115 PetscFunctionBegin; 1116 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1117 *missing = PETSC_FALSE; 1118 if (A->rmap->n > 0 && !jj) { 1119 *missing = PETSC_TRUE; 1120 if (d) *d = 0; 1121 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1122 } else { 1123 diag = a->diag; 1124 for (i=0; i<a->mbs; i++) { 1125 if (jj[diag[i]] != i) { 1126 *missing = PETSC_TRUE; 1127 if (d) *d = i; 1128 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1129 } 1130 } 1131 } 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1137 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1138 { 1139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1140 PetscErrorCode ierr; 1141 PetscInt i,j,m = a->mbs; 1142 1143 PetscFunctionBegin; 1144 if (!a->diag) { 1145 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1146 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1147 a->free_diag = PETSC_TRUE; 1148 } 1149 for (i=0; i<m; i++) { 1150 a->diag[i] = a->i[i+1]; 1151 for (j=a->i[i]; j<a->i[i+1]; j++) { 1152 if (a->j[j] == i) { 1153 a->diag[i] = j; 1154 break; 1155 } 1156 } 1157 } 1158 PetscFunctionReturn(0); 1159 } 1160 1161 1162 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1166 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1167 { 1168 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1169 PetscErrorCode ierr; 1170 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1171 PetscInt *tia, *tja; 1172 1173 PetscFunctionBegin; 1174 *nn = n; 1175 if (!ia) PetscFunctionReturn(0); 1176 if (symmetric) { 1177 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1178 } else { 1179 tia = a->i; tja = a->j; 1180 } 1181 1182 if (!blockcompressed && bs > 1) { 1183 (*nn) *= bs; 1184 nbs = bs; 1185 /* malloc & create the natural set of indices */ 1186 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1187 if (n) { 1188 (*ia)[0] = 0; 1189 for (j=1; j<bs; j++) { 1190 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1191 } 1192 } 1193 1194 for (i=1; i<n; i++) { 1195 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1196 for (j=1; j<bs; j++) { 1197 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1198 } 1199 } 1200 if (n) { 1201 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1202 } 1203 1204 if (ja) { 1205 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1206 cnt = 0; 1207 for (i=0; i<n; i++) { 1208 for (j=0; j<bs; j++) { 1209 for (k=tia[i]; k<tia[i+1]; k++) { 1210 for (l=0; l<bs; l++) { 1211 (*ja)[cnt++] = bs*tja[k] + l; 1212 } 1213 } 1214 } 1215 } 1216 } 1217 1218 n *= bs; 1219 nz *= bs*bs; 1220 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1221 ierr = PetscFree(tia);CHKERRQ(ierr); 1222 ierr = PetscFree(tja);CHKERRQ(ierr); 1223 } 1224 } else if (oshift == 1) { 1225 if (symmetric) { 1226 PetscInt nz = tia[A->rmap->n/bs]; 1227 /* add 1 to i and j indices */ 1228 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1229 *ia = tia; 1230 if (ja) { 1231 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1232 *ja = tja; 1233 } 1234 } else { 1235 PetscInt nz = a->i[A->rmap->n/bs]; 1236 /* malloc space and add 1 to i and j indices */ 1237 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1238 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1239 if (ja) { 1240 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1241 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1242 } 1243 } 1244 } else { 1245 *ia = tia; 1246 if (ja) *ja = tja; 1247 } 1248 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1254 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1255 { 1256 PetscErrorCode ierr; 1257 1258 PetscFunctionBegin; 1259 if (!ia) PetscFunctionReturn(0); 1260 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1261 ierr = PetscFree(*ia);CHKERRQ(ierr); 1262 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1269 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1270 { 1271 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1277 #endif 1278 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1279 if (a->row) { 1280 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1281 } 1282 if (a->col) { 1283 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1284 } 1285 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1286 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1287 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1288 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1289 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1290 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1291 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1292 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1293 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1294 1295 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1296 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1297 ierr = PetscFree(a);CHKERRQ(ierr); 1298 1299 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1313 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1314 { 1315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 switch (op) { 1320 case MAT_ROW_ORIENTED: 1321 a->roworiented = flg; 1322 break; 1323 case MAT_KEEP_NONZERO_PATTERN: 1324 a->keepnonzeropattern = flg; 1325 break; 1326 case MAT_NEW_NONZERO_LOCATIONS: 1327 a->nonew = (flg ? 0 : 1); 1328 break; 1329 case MAT_NEW_NONZERO_LOCATION_ERR: 1330 a->nonew = (flg ? -1 : 0); 1331 break; 1332 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1333 a->nonew = (flg ? -2 : 0); 1334 break; 1335 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1336 a->nounused = (flg ? -1 : 0); 1337 break; 1338 case MAT_NEW_DIAGONALS: 1339 case MAT_IGNORE_OFF_PROC_ENTRIES: 1340 case MAT_USE_HASH_TABLE: 1341 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1342 break; 1343 case MAT_SYMMETRIC: 1344 case MAT_STRUCTURALLY_SYMMETRIC: 1345 case MAT_HERMITIAN: 1346 case MAT_SYMMETRY_ETERNAL: 1347 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1348 break; 1349 default: 1350 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1357 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1358 { 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1360 PetscErrorCode ierr; 1361 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1362 MatScalar *aa,*aa_i; 1363 PetscScalar *v_i; 1364 1365 PetscFunctionBegin; 1366 bs = A->rmap->bs; 1367 ai = a->i; 1368 aj = a->j; 1369 aa = a->a; 1370 bs2 = a->bs2; 1371 1372 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1373 1374 bn = row/bs; /* Block number */ 1375 bp = row % bs; /* Block Position */ 1376 M = ai[bn+1] - ai[bn]; 1377 *nz = bs*M; 1378 1379 if (v) { 1380 *v = 0; 1381 if (*nz) { 1382 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1383 for (i=0; i<M; i++) { /* for each block in the block row */ 1384 v_i = *v + i*bs; 1385 aa_i = aa + bs2*(ai[bn] + i); 1386 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1387 } 1388 } 1389 } 1390 1391 if (idx) { 1392 *idx = 0; 1393 if (*nz) { 1394 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1395 for (i=0; i<M; i++) { /* for each block in the block row */ 1396 idx_i = *idx + i*bs; 1397 itmp = bs*aj[ai[bn] + i]; 1398 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1399 } 1400 } 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1407 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1408 { 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1413 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1414 PetscFunctionReturn(0); 1415 } 1416 1417 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1421 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1422 { 1423 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1424 Mat C; 1425 PetscErrorCode ierr; 1426 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1427 PetscInt *rows,*cols,bs2=a->bs2; 1428 MatScalar *array; 1429 1430 PetscFunctionBegin; 1431 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1432 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1433 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1434 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1435 1436 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1437 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1438 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1439 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1440 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1441 ierr = PetscFree(col);CHKERRQ(ierr); 1442 } else { 1443 C = *B; 1444 } 1445 1446 array = a->a; 1447 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1448 for (i=0; i<mbs; i++) { 1449 cols[0] = i*bs; 1450 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1451 len = ai[i+1] - ai[i]; 1452 for (j=0; j<len; j++) { 1453 rows[0] = (*aj++)*bs; 1454 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1455 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1456 array += bs2; 1457 } 1458 } 1459 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1460 1461 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 1464 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1465 *B = C; 1466 } else { 1467 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1474 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1475 { 1476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1479 int fd; 1480 PetscScalar *aa; 1481 FILE *file; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1485 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1486 col_lens[0] = MAT_FILE_COOKIE; 1487 1488 col_lens[1] = A->rmap->N; 1489 col_lens[2] = A->cmap->n; 1490 col_lens[3] = a->nz*bs2; 1491 1492 /* store lengths of each row and write (including header) to file */ 1493 count = 0; 1494 for (i=0; i<a->mbs; i++) { 1495 for (j=0; j<bs; j++) { 1496 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1497 } 1498 } 1499 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1500 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1501 1502 /* store column indices (zero start index) */ 1503 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1504 count = 0; 1505 for (i=0; i<a->mbs; i++) { 1506 for (j=0; j<bs; j++) { 1507 for (k=a->i[i]; k<a->i[i+1]; k++) { 1508 for (l=0; l<bs; l++) { 1509 jj[count++] = bs*a->j[k] + l; 1510 } 1511 } 1512 } 1513 } 1514 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1515 ierr = PetscFree(jj);CHKERRQ(ierr); 1516 1517 /* store nonzero values */ 1518 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1519 count = 0; 1520 for (i=0; i<a->mbs; i++) { 1521 for (j=0; j<bs; j++) { 1522 for (k=a->i[i]; k<a->i[i+1]; k++) { 1523 for (l=0; l<bs; l++) { 1524 aa[count++] = a->a[bs2*k + l*bs + j]; 1525 } 1526 } 1527 } 1528 } 1529 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1530 ierr = PetscFree(aa);CHKERRQ(ierr); 1531 1532 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1533 if (file) { 1534 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1541 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1542 { 1543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1544 PetscErrorCode ierr; 1545 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1546 PetscViewerFormat format; 1547 1548 PetscFunctionBegin; 1549 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1550 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1551 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1552 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1553 Mat aij; 1554 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1555 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1556 ierr = MatDestroy(aij);CHKERRQ(ierr); 1557 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1558 PetscFunctionReturn(0); 1559 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1560 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1561 for (i=0; i<a->mbs; i++) { 1562 for (j=0; j<bs; j++) { 1563 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1564 for (k=a->i[i]; k<a->i[i+1]; k++) { 1565 for (l=0; l<bs; l++) { 1566 #if defined(PETSC_USE_COMPLEX) 1567 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1568 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1569 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1570 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1571 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1572 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1573 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1574 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1575 } 1576 #else 1577 if (a->a[bs2*k + l*bs + j] != 0.0) { 1578 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1579 } 1580 #endif 1581 } 1582 } 1583 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1584 } 1585 } 1586 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1587 } else { 1588 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1589 for (i=0; i<a->mbs; i++) { 1590 for (j=0; j<bs; j++) { 1591 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1592 for (k=a->i[i]; k<a->i[i+1]; k++) { 1593 for (l=0; l<bs; l++) { 1594 #if defined(PETSC_USE_COMPLEX) 1595 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1596 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1597 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1598 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1599 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1600 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1601 } else { 1602 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1603 } 1604 #else 1605 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1606 #endif 1607 } 1608 } 1609 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1613 } 1614 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1620 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1621 { 1622 Mat A = (Mat) Aa; 1623 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1624 PetscErrorCode ierr; 1625 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1626 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1627 MatScalar *aa; 1628 PetscViewer viewer; 1629 1630 PetscFunctionBegin; 1631 1632 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1633 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1634 1635 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1636 1637 /* loop over matrix elements drawing boxes */ 1638 color = PETSC_DRAW_BLUE; 1639 for (i=0,row=0; i<mbs; i++,row+=bs) { 1640 for (j=a->i[i]; j<a->i[i+1]; j++) { 1641 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1642 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1643 aa = a->a + j*bs2; 1644 for (k=0; k<bs; k++) { 1645 for (l=0; l<bs; l++) { 1646 if (PetscRealPart(*aa++) >= 0.) continue; 1647 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1648 } 1649 } 1650 } 1651 } 1652 color = PETSC_DRAW_CYAN; 1653 for (i=0,row=0; i<mbs; i++,row+=bs) { 1654 for (j=a->i[i]; j<a->i[i+1]; j++) { 1655 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1656 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1657 aa = a->a + j*bs2; 1658 for (k=0; k<bs; k++) { 1659 for (l=0; l<bs; l++) { 1660 if (PetscRealPart(*aa++) != 0.) continue; 1661 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1662 } 1663 } 1664 } 1665 } 1666 1667 color = PETSC_DRAW_RED; 1668 for (i=0,row=0; i<mbs; i++,row+=bs) { 1669 for (j=a->i[i]; j<a->i[i+1]; j++) { 1670 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1671 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1672 aa = a->a + j*bs2; 1673 for (k=0; k<bs; k++) { 1674 for (l=0; l<bs; l++) { 1675 if (PetscRealPart(*aa++) <= 0.) continue; 1676 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1686 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1687 { 1688 PetscErrorCode ierr; 1689 PetscReal xl,yl,xr,yr,w,h; 1690 PetscDraw draw; 1691 PetscTruth isnull; 1692 1693 PetscFunctionBegin; 1694 1695 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1696 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1697 1698 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1699 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1700 xr += w; yr += h; xl = -w; yl = -h; 1701 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1702 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1703 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatView_SeqBAIJ" 1709 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1710 { 1711 PetscErrorCode ierr; 1712 PetscTruth iascii,isbinary,isdraw; 1713 1714 PetscFunctionBegin; 1715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1717 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1718 if (iascii){ 1719 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1720 } else if (isbinary) { 1721 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1722 } else if (isdraw) { 1723 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1724 } else { 1725 Mat B; 1726 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1727 ierr = MatView(B,viewer);CHKERRQ(ierr); 1728 ierr = MatDestroy(B);CHKERRQ(ierr); 1729 } 1730 PetscFunctionReturn(0); 1731 } 1732 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1736 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1737 { 1738 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1739 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1740 PetscInt *ai = a->i,*ailen = a->ilen; 1741 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1742 MatScalar *ap,*aa = a->a; 1743 1744 PetscFunctionBegin; 1745 for (k=0; k<m; k++) { /* loop over rows */ 1746 row = im[k]; brow = row/bs; 1747 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1748 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1749 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1750 nrow = ailen[brow]; 1751 for (l=0; l<n; l++) { /* loop over columns */ 1752 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1753 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1754 col = in[l] ; 1755 bcol = col/bs; 1756 cidx = col%bs; 1757 ridx = row%bs; 1758 high = nrow; 1759 low = 0; /* assume unsorted */ 1760 while (high-low > 5) { 1761 t = (low+high)/2; 1762 if (rp[t] > bcol) high = t; 1763 else low = t; 1764 } 1765 for (i=low; i<high; i++) { 1766 if (rp[i] > bcol) break; 1767 if (rp[i] == bcol) { 1768 *v++ = ap[bs2*i+bs*cidx+ridx]; 1769 goto finished; 1770 } 1771 } 1772 *v++ = 0.0; 1773 finished:; 1774 } 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #define CHUNKSIZE 10 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1782 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1783 { 1784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1785 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1786 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1787 PetscErrorCode ierr; 1788 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1789 PetscTruth roworiented=a->roworiented; 1790 const PetscScalar *value = v; 1791 MatScalar *ap,*aa = a->a,*bap; 1792 1793 PetscFunctionBegin; 1794 if (roworiented) { 1795 stepval = (n-1)*bs; 1796 } else { 1797 stepval = (m-1)*bs; 1798 } 1799 for (k=0; k<m; k++) { /* loop over added rows */ 1800 row = im[k]; 1801 if (row < 0) continue; 1802 #if defined(PETSC_USE_DEBUG) 1803 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1804 #endif 1805 rp = aj + ai[row]; 1806 ap = aa + bs2*ai[row]; 1807 rmax = imax[row]; 1808 nrow = ailen[row]; 1809 low = 0; 1810 high = nrow; 1811 for (l=0; l<n; l++) { /* loop over added columns */ 1812 if (in[l] < 0) continue; 1813 #if defined(PETSC_USE_DEBUG) 1814 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1815 #endif 1816 col = in[l]; 1817 if (roworiented) { 1818 value = v + k*(stepval+bs)*bs + l*bs; 1819 } else { 1820 value = v + l*(stepval+bs)*bs + k*bs; 1821 } 1822 if (col <= lastcol) low = 0; else high = nrow; 1823 lastcol = col; 1824 while (high-low > 7) { 1825 t = (low+high)/2; 1826 if (rp[t] > col) high = t; 1827 else low = t; 1828 } 1829 for (i=low; i<high; i++) { 1830 if (rp[i] > col) break; 1831 if (rp[i] == col) { 1832 bap = ap + bs2*i; 1833 if (roworiented) { 1834 if (is == ADD_VALUES) { 1835 for (ii=0; ii<bs; ii++,value+=stepval) { 1836 for (jj=ii; jj<bs2; jj+=bs) { 1837 bap[jj] += *value++; 1838 } 1839 } 1840 } else { 1841 for (ii=0; ii<bs; ii++,value+=stepval) { 1842 for (jj=ii; jj<bs2; jj+=bs) { 1843 bap[jj] = *value++; 1844 } 1845 } 1846 } 1847 } else { 1848 if (is == ADD_VALUES) { 1849 for (ii=0; ii<bs; ii++,value+=stepval) { 1850 for (jj=0; jj<bs; jj++) { 1851 *bap++ += *value++; 1852 } 1853 } 1854 } else { 1855 for (ii=0; ii<bs; ii++,value+=stepval) { 1856 for (jj=0; jj<bs; jj++) { 1857 *bap++ = *value++; 1858 } 1859 } 1860 } 1861 } 1862 goto noinsert2; 1863 } 1864 } 1865 if (nonew == 1) goto noinsert2; 1866 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1867 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1868 N = nrow++ - 1; high++; 1869 /* shift up all the later entries in this row */ 1870 for (ii=N; ii>=i; ii--) { 1871 rp[ii+1] = rp[ii]; 1872 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1873 } 1874 if (N >= i) { 1875 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1876 } 1877 rp[i] = col; 1878 bap = ap + bs2*i; 1879 if (roworiented) { 1880 for (ii=0; ii<bs; ii++,value+=stepval) { 1881 for (jj=ii; jj<bs2; jj+=bs) { 1882 bap[jj] = *value++; 1883 } 1884 } 1885 } else { 1886 for (ii=0; ii<bs; ii++,value+=stepval) { 1887 for (jj=0; jj<bs; jj++) { 1888 *bap++ = *value++; 1889 } 1890 } 1891 } 1892 noinsert2:; 1893 low = i; 1894 } 1895 ailen[row] = nrow; 1896 } 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1902 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1903 { 1904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1905 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1906 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1907 PetscErrorCode ierr; 1908 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1909 MatScalar *aa = a->a,*ap; 1910 PetscReal ratio=0.6; 1911 1912 PetscFunctionBegin; 1913 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1914 1915 if (m) rmax = ailen[0]; 1916 for (i=1; i<mbs; i++) { 1917 /* move each row back by the amount of empty slots (fshift) before it*/ 1918 fshift += imax[i-1] - ailen[i-1]; 1919 rmax = PetscMax(rmax,ailen[i]); 1920 if (fshift) { 1921 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1922 N = ailen[i]; 1923 for (j=0; j<N; j++) { 1924 ip[j-fshift] = ip[j]; 1925 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1926 } 1927 } 1928 ai[i] = ai[i-1] + ailen[i-1]; 1929 } 1930 if (mbs) { 1931 fshift += imax[mbs-1] - ailen[mbs-1]; 1932 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1933 } 1934 /* reset ilen and imax for each row */ 1935 for (i=0; i<mbs; i++) { 1936 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1937 } 1938 a->nz = ai[mbs]; 1939 1940 /* diagonals may have moved, so kill the diagonal pointers */ 1941 a->idiagvalid = PETSC_FALSE; 1942 if (fshift && a->diag) { 1943 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1944 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1945 a->diag = 0; 1946 } 1947 if (fshift && a->nounused == -1) { 1948 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1949 } 1950 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1953 a->reallocs = 0; 1954 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1955 1956 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1957 if (a->compressedrow.use){ 1958 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1959 } 1960 1961 A->same_nonzero = PETSC_TRUE; 1962 PetscFunctionReturn(0); 1963 } 1964 1965 /* 1966 This function returns an array of flags which indicate the locations of contiguous 1967 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1968 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1969 Assume: sizes should be long enough to hold all the values. 1970 */ 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1973 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1974 { 1975 PetscInt i,j,k,row; 1976 PetscTruth flg; 1977 1978 PetscFunctionBegin; 1979 for (i=0,j=0; i<n; j++) { 1980 row = idx[i]; 1981 if (row%bs!=0) { /* Not the begining of a block */ 1982 sizes[j] = 1; 1983 i++; 1984 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1985 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1986 i++; 1987 } else { /* Begining of the block, so check if the complete block exists */ 1988 flg = PETSC_TRUE; 1989 for (k=1; k<bs; k++) { 1990 if (row+k != idx[i+k]) { /* break in the block */ 1991 flg = PETSC_FALSE; 1992 break; 1993 } 1994 } 1995 if (flg) { /* No break in the bs */ 1996 sizes[j] = bs; 1997 i+= bs; 1998 } else { 1999 sizes[j] = 1; 2000 i++; 2001 } 2002 } 2003 } 2004 *bs_max = j; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2010 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2011 { 2012 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2013 PetscErrorCode ierr; 2014 PetscInt i,j,k,count,*rows; 2015 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2016 PetscScalar zero = 0.0; 2017 MatScalar *aa; 2018 2019 PetscFunctionBegin; 2020 /* Make a copy of the IS and sort it */ 2021 /* allocate memory for rows,sizes */ 2022 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2023 2024 /* copy IS values to rows, and sort them */ 2025 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2026 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2027 if (baij->keepnonzeropattern) { 2028 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2029 bs_max = is_n; 2030 A->same_nonzero = PETSC_TRUE; 2031 } else { 2032 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2033 A->same_nonzero = PETSC_FALSE; 2034 } 2035 2036 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2037 row = rows[j]; 2038 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2039 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2040 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2041 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2042 if (diag != 0.0) { 2043 if (baij->ilen[row/bs] > 0) { 2044 baij->ilen[row/bs] = 1; 2045 baij->j[baij->i[row/bs]] = row/bs; 2046 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2047 } 2048 /* Now insert all the diagonal values for this bs */ 2049 for (k=0; k<bs; k++) { 2050 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 } else { /* (diag == 0.0) */ 2053 baij->ilen[row/bs] = 0; 2054 } /* end (diag == 0.0) */ 2055 } else { /* (sizes[i] != bs) */ 2056 #if defined (PETSC_USE_DEBUG) 2057 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2058 #endif 2059 for (k=0; k<count; k++) { 2060 aa[0] = zero; 2061 aa += bs; 2062 } 2063 if (diag != 0.0) { 2064 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2065 } 2066 } 2067 } 2068 2069 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2070 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2076 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2077 { 2078 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2079 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2080 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2081 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2082 PetscErrorCode ierr; 2083 PetscInt ridx,cidx,bs2=a->bs2; 2084 PetscTruth roworiented=a->roworiented; 2085 MatScalar *ap,value,*aa=a->a,*bap; 2086 2087 PetscFunctionBegin; 2088 if (v) PetscValidScalarPointer(v,6); 2089 for (k=0; k<m; k++) { /* loop over added rows */ 2090 row = im[k]; 2091 brow = row/bs; 2092 if (row < 0) continue; 2093 #if defined(PETSC_USE_DEBUG) 2094 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2095 #endif 2096 rp = aj + ai[brow]; 2097 ap = aa + bs2*ai[brow]; 2098 rmax = imax[brow]; 2099 nrow = ailen[brow]; 2100 low = 0; 2101 high = nrow; 2102 for (l=0; l<n; l++) { /* loop over added columns */ 2103 if (in[l] < 0) continue; 2104 #if defined(PETSC_USE_DEBUG) 2105 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2106 #endif 2107 col = in[l]; bcol = col/bs; 2108 ridx = row % bs; cidx = col % bs; 2109 if (roworiented) { 2110 value = v[l + k*n]; 2111 } else { 2112 value = v[k + l*m]; 2113 } 2114 if (col <= lastcol) low = 0; else high = nrow; 2115 lastcol = col; 2116 while (high-low > 7) { 2117 t = (low+high)/2; 2118 if (rp[t] > bcol) high = t; 2119 else low = t; 2120 } 2121 for (i=low; i<high; i++) { 2122 if (rp[i] > bcol) break; 2123 if (rp[i] == bcol) { 2124 bap = ap + bs2*i + bs*cidx + ridx; 2125 if (is == ADD_VALUES) *bap += value; 2126 else *bap = value; 2127 goto noinsert1; 2128 } 2129 } 2130 if (nonew == 1) goto noinsert1; 2131 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2133 N = nrow++ - 1; high++; 2134 /* shift up all the later entries in this row */ 2135 for (ii=N; ii>=i; ii--) { 2136 rp[ii+1] = rp[ii]; 2137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2138 } 2139 if (N>=i) { 2140 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2141 } 2142 rp[i] = bcol; 2143 ap[bs2*i + bs*cidx + ridx] = value; 2144 a->nz++; 2145 noinsert1:; 2146 low = i; 2147 } 2148 ailen[brow] = nrow; 2149 } 2150 A->same_nonzero = PETSC_FALSE; 2151 PetscFunctionReturn(0); 2152 } 2153 2154 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 2155 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2158 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2159 { 2160 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2161 Mat outA; 2162 PetscErrorCode ierr; 2163 PetscTruth row_identity,col_identity; 2164 2165 PetscFunctionBegin; 2166 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2167 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2168 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2169 if (!row_identity || !col_identity) { 2170 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2171 } 2172 2173 outA = inA; 2174 inA->factor = MAT_FACTOR_LU; 2175 2176 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2177 2178 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2179 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2180 a->row = row; 2181 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2182 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2183 a->col = col; 2184 2185 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2186 if (a->icol) { 2187 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2188 } 2189 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2190 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2191 2192 ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2193 if (!a->solve_work) { 2194 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2195 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2196 } 2197 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2198 2199 PetscFunctionReturn(0); 2200 } 2201 2202 EXTERN_C_BEGIN 2203 #undef __FUNCT__ 2204 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2205 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2206 { 2207 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2208 PetscInt i,nz,mbs; 2209 2210 PetscFunctionBegin; 2211 nz = baij->maxnz/baij->bs2; 2212 mbs = baij->mbs; 2213 for (i=0; i<nz; i++) { 2214 baij->j[i] = indices[i]; 2215 } 2216 baij->nz = nz; 2217 for (i=0; i<mbs; i++) { 2218 baij->ilen[i] = baij->imax[i]; 2219 } 2220 PetscFunctionReturn(0); 2221 } 2222 EXTERN_C_END 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2226 /*@ 2227 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2228 in the matrix. 2229 2230 Input Parameters: 2231 + mat - the SeqBAIJ matrix 2232 - indices - the column indices 2233 2234 Level: advanced 2235 2236 Notes: 2237 This can be called if you have precomputed the nonzero structure of the 2238 matrix and want to provide it to the matrix object to improve the performance 2239 of the MatSetValues() operation. 2240 2241 You MUST have set the correct numbers of nonzeros per row in the call to 2242 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2243 2244 MUST be called before any calls to MatSetValues(); 2245 2246 @*/ 2247 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2248 { 2249 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2250 2251 PetscFunctionBegin; 2252 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2253 PetscValidPointer(indices,2); 2254 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2255 if (f) { 2256 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2257 } else { 2258 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2265 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2266 { 2267 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2270 PetscReal atmp; 2271 PetscScalar *x,zero = 0.0; 2272 MatScalar *aa; 2273 PetscInt ncols,brow,krow,kcol; 2274 2275 PetscFunctionBegin; 2276 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2277 bs = A->rmap->bs; 2278 aa = a->a; 2279 ai = a->i; 2280 aj = a->j; 2281 mbs = a->mbs; 2282 2283 ierr = VecSet(v,zero);CHKERRQ(ierr); 2284 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2285 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2286 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2287 for (i=0; i<mbs; i++) { 2288 ncols = ai[1] - ai[0]; ai++; 2289 brow = bs*i; 2290 for (j=0; j<ncols; j++){ 2291 for (kcol=0; kcol<bs; kcol++){ 2292 for (krow=0; krow<bs; krow++){ 2293 atmp = PetscAbsScalar(*aa);aa++; 2294 row = brow + krow; /* row index */ 2295 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2296 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2297 } 2298 } 2299 aj++; 2300 } 2301 } 2302 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatCopy_SeqBAIJ" 2308 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2309 { 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 /* If the two matrices have the same copy implementation, use fast copy. */ 2314 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2316 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2317 2318 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2319 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2320 } 2321 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2322 } else { 2323 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2324 } 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2330 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2331 { 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2341 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2342 { 2343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2344 PetscFunctionBegin; 2345 *array = a->a; 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2351 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2352 { 2353 PetscFunctionBegin; 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #include "petscblaslapack.h" 2358 #undef __FUNCT__ 2359 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2360 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2361 { 2362 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2363 PetscErrorCode ierr; 2364 PetscInt i,bs=Y->rmap->bs,j,bs2; 2365 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2366 2367 PetscFunctionBegin; 2368 if (str == SAME_NONZERO_PATTERN) { 2369 PetscScalar alpha = a; 2370 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2371 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2372 if (y->xtoy && y->XtoY != X) { 2373 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2374 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2375 } 2376 if (!y->xtoy) { /* get xtoy */ 2377 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2378 y->XtoY = X; 2379 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2380 } 2381 bs2 = bs*bs; 2382 for (i=0; i<x->nz; i++) { 2383 j = 0; 2384 while (j < bs2){ 2385 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2386 j++; 2387 } 2388 } 2389 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2390 } else { 2391 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2392 } 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2398 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2399 { 2400 PetscInt rbs,cbs; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2405 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2406 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2407 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2413 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2414 { 2415 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2416 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2417 MatScalar *aa = a->a; 2418 2419 PetscFunctionBegin; 2420 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2426 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2427 { 2428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2429 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2430 MatScalar *aa = a->a; 2431 2432 PetscFunctionBegin; 2433 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2441 /* 2442 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2443 */ 2444 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2445 { 2446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2447 PetscErrorCode ierr; 2448 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2449 PetscInt nz = a->i[m],row,*jj,mr,col; 2450 2451 PetscFunctionBegin; 2452 *nn = n; 2453 if (!ia) PetscFunctionReturn(0); 2454 if (symmetric) { 2455 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2456 } else { 2457 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2458 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2459 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2460 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2461 jj = a->j; 2462 for (i=0; i<nz; i++) { 2463 collengths[jj[i]]++; 2464 } 2465 cia[0] = oshift; 2466 for (i=0; i<n; i++) { 2467 cia[i+1] = cia[i] + collengths[i]; 2468 } 2469 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2470 jj = a->j; 2471 for (row=0; row<m; row++) { 2472 mr = a->i[row+1] - a->i[row]; 2473 for (i=0; i<mr; i++) { 2474 col = *jj++; 2475 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2476 } 2477 } 2478 ierr = PetscFree(collengths);CHKERRQ(ierr); 2479 *ia = cia; *ja = cja; 2480 } 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2486 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2487 { 2488 PetscErrorCode ierr; 2489 2490 PetscFunctionBegin; 2491 if (!ia) PetscFunctionReturn(0); 2492 ierr = PetscFree(*ia);CHKERRQ(ierr); 2493 ierr = PetscFree(*ja);CHKERRQ(ierr); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 #undef __FUNCT__ 2498 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2499 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2500 { 2501 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2502 PetscErrorCode ierr; 2503 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2504 PetscScalar dx,*y,*xx,*w3_array; 2505 PetscScalar *vscale_array; 2506 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2507 Vec w1=coloring->w1,w2=coloring->w2,w3; 2508 void *fctx = coloring->fctx; 2509 PetscTruth flg = PETSC_FALSE; 2510 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2511 Vec x1_tmp; 2512 2513 PetscFunctionBegin; 2514 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2515 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2516 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2517 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2518 2519 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2520 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2521 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2522 if (flg) { 2523 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2524 } else { 2525 PetscTruth assembled; 2526 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2527 if (assembled) { 2528 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2529 } 2530 } 2531 2532 x1_tmp = x1; 2533 if (!coloring->vscale){ 2534 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2535 } 2536 2537 /* 2538 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2539 coloring->F for the coarser grids from the finest 2540 */ 2541 if (coloring->F) { 2542 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2543 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2544 if (m1 != m2) { 2545 coloring->F = 0; 2546 } 2547 } 2548 2549 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2550 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2551 } 2552 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2553 2554 /* Set w1 = F(x1) */ 2555 if (coloring->F) { 2556 w1 = coloring->F; /* use already computed value of function */ 2557 coloring->F = 0; 2558 } else { 2559 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2560 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2561 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2562 } 2563 2564 if (!coloring->w3) { 2565 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2566 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2567 } 2568 w3 = coloring->w3; 2569 2570 CHKMEMQ; 2571 /* Compute all the local scale factors, including ghost points */ 2572 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2573 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2574 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2575 if (ctype == IS_COLORING_GHOSTED){ 2576 col_start = 0; col_end = N; 2577 } else if (ctype == IS_COLORING_GLOBAL){ 2578 xx = xx - start; 2579 vscale_array = vscale_array - start; 2580 col_start = start; col_end = N + start; 2581 } CHKMEMQ; 2582 for (col=col_start; col<col_end; col++){ 2583 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2584 if (coloring->htype[0] == 'w') { 2585 dx = 1.0 + unorm; 2586 } else { 2587 dx = xx[col]; 2588 } 2589 if (dx == 0.0) dx = 1.0; 2590 #if !defined(PETSC_USE_COMPLEX) 2591 if (dx < umin && dx >= 0.0) dx = umin; 2592 else if (dx < 0.0 && dx > -umin) dx = -umin; 2593 #else 2594 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2595 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2596 #endif 2597 dx *= epsilon; 2598 vscale_array[col] = 1.0/dx; 2599 } CHKMEMQ; 2600 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2601 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2602 if (ctype == IS_COLORING_GLOBAL){ 2603 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2604 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2605 } 2606 CHKMEMQ; 2607 if (coloring->vscaleforrow) { 2608 vscaleforrow = coloring->vscaleforrow; 2609 } else { 2610 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2611 } 2612 2613 2614 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2615 /* 2616 Loop over each color 2617 */ 2618 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2619 for (k=0; k<coloring->ncolors; k++) { 2620 coloring->currentcolor = k; 2621 for (i=0; i<bs; i++) { 2622 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2623 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2624 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2625 /* 2626 Loop over each column associated with color 2627 adding the perturbation to the vector w3. 2628 */ 2629 for (l=0; l<coloring->ncolumns[k]; l++) { 2630 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2631 if (coloring->htype[0] == 'w') { 2632 dx = 1.0 + unorm; 2633 } else { 2634 dx = xx[col]; 2635 } 2636 if (dx == 0.0) dx = 1.0; 2637 #if !defined(PETSC_USE_COMPLEX) 2638 if (dx < umin && dx >= 0.0) dx = umin; 2639 else if (dx < 0.0 && dx > -umin) dx = -umin; 2640 #else 2641 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2642 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2643 #endif 2644 dx *= epsilon; 2645 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2646 w3_array[col] += dx; 2647 } 2648 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2649 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2650 2651 /* 2652 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2653 w2 = F(x1 + dx) - F(x1) 2654 */ 2655 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2656 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2657 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2658 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2659 2660 /* 2661 Loop over rows of vector, putting results into Jacobian matrix 2662 */ 2663 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2664 for (l=0; l<coloring->nrows[k]; l++) { 2665 row = bs*coloring->rows[k][l]; /* local row index */ 2666 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2667 for (j=0; j<bs; j++) { 2668 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2669 srows[j] = row + start + j; 2670 } 2671 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2672 } 2673 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2674 } 2675 } /* endof for each color */ 2676 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2677 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2678 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2679 ierr = PetscFree(srows);CHKERRQ(ierr); 2680 2681 coloring->currentcolor = -1; 2682 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2683 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2684 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2685 PetscFunctionReturn(0); 2686 } 2687 2688 /* -------------------------------------------------------------------*/ 2689 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2690 MatGetRow_SeqBAIJ, 2691 MatRestoreRow_SeqBAIJ, 2692 MatMult_SeqBAIJ_N, 2693 /* 4*/ MatMultAdd_SeqBAIJ_N, 2694 MatMultTranspose_SeqBAIJ, 2695 MatMultTransposeAdd_SeqBAIJ, 2696 0, 2697 0, 2698 0, 2699 /*10*/ 0, 2700 MatLUFactor_SeqBAIJ, 2701 0, 2702 0, 2703 MatTranspose_SeqBAIJ, 2704 /*15*/ MatGetInfo_SeqBAIJ, 2705 MatEqual_SeqBAIJ, 2706 MatGetDiagonal_SeqBAIJ, 2707 MatDiagonalScale_SeqBAIJ, 2708 MatNorm_SeqBAIJ, 2709 /*20*/ 0, 2710 MatAssemblyEnd_SeqBAIJ, 2711 MatSetOption_SeqBAIJ, 2712 MatZeroEntries_SeqBAIJ, 2713 /*24*/ MatZeroRows_SeqBAIJ, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2719 0, 2720 0, 2721 MatGetArray_SeqBAIJ, 2722 MatRestoreArray_SeqBAIJ, 2723 /*34*/ MatDuplicate_SeqBAIJ, 2724 0, 2725 0, 2726 MatILUFactor_SeqBAIJ, 2727 0, 2728 /*39*/ MatAXPY_SeqBAIJ, 2729 MatGetSubMatrices_SeqBAIJ, 2730 MatIncreaseOverlap_SeqBAIJ, 2731 MatGetValues_SeqBAIJ, 2732 MatCopy_SeqBAIJ, 2733 /*44*/ 0, 2734 MatScale_SeqBAIJ, 2735 0, 2736 0, 2737 MatILUDTFactor_SeqBAIJ, 2738 /*49*/ MatSetBlockSize_SeqBAIJ, 2739 MatGetRowIJ_SeqBAIJ, 2740 MatRestoreRowIJ_SeqBAIJ, 2741 MatGetColumnIJ_SeqBAIJ, 2742 MatRestoreColumnIJ_SeqBAIJ, 2743 /*54*/ MatFDColoringCreate_SeqAIJ, 2744 0, 2745 0, 2746 0, 2747 MatSetValuesBlocked_SeqBAIJ, 2748 /*59*/ MatGetSubMatrix_SeqBAIJ, 2749 MatDestroy_SeqBAIJ, 2750 MatView_SeqBAIJ, 2751 0, 2752 0, 2753 /*64*/ 0, 2754 0, 2755 0, 2756 0, 2757 0, 2758 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2759 0, 2760 MatConvert_Basic, 2761 0, 2762 0, 2763 /*74*/ 0, 2764 MatFDColoringApply_BAIJ, 2765 0, 2766 0, 2767 0, 2768 /*79*/ 0, 2769 0, 2770 0, 2771 0, 2772 MatLoad_SeqBAIJ, 2773 /*84*/ 0, 2774 0, 2775 0, 2776 0, 2777 0, 2778 /*89*/ 0, 2779 0, 2780 0, 2781 0, 2782 0, 2783 /*94*/ 0, 2784 0, 2785 0, 2786 0, 2787 0, 2788 /*99*/0, 2789 0, 2790 0, 2791 0, 2792 0, 2793 /*104*/0, 2794 MatRealPart_SeqBAIJ, 2795 MatImaginaryPart_SeqBAIJ, 2796 0, 2797 0, 2798 /*109*/0, 2799 0, 2800 0, 2801 0, 2802 MatMissingDiagonal_SeqBAIJ, 2803 /*114*/0, 2804 0, 2805 0, 2806 0, 2807 0, 2808 /*119*/0, 2809 0, 2810 MatMultHermitianTranspose_SeqBAIJ, 2811 MatMultHermitianTransposeAdd_SeqBAIJ 2812 }; 2813 2814 EXTERN_C_BEGIN 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2817 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2818 { 2819 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2820 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2821 PetscErrorCode ierr; 2822 2823 PetscFunctionBegin; 2824 if (aij->nonew != 1) { 2825 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2826 } 2827 2828 /* allocate space for values if not already there */ 2829 if (!aij->saved_values) { 2830 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2831 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2832 } 2833 2834 /* copy values over */ 2835 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2836 PetscFunctionReturn(0); 2837 } 2838 EXTERN_C_END 2839 2840 EXTERN_C_BEGIN 2841 #undef __FUNCT__ 2842 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2843 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2844 { 2845 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2846 PetscErrorCode ierr; 2847 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2848 2849 PetscFunctionBegin; 2850 if (aij->nonew != 1) { 2851 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2852 } 2853 if (!aij->saved_values) { 2854 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2855 } 2856 2857 /* copy values over */ 2858 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 EXTERN_C_END 2862 2863 EXTERN_C_BEGIN 2864 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2865 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2866 EXTERN_C_END 2867 2868 EXTERN_C_BEGIN 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2871 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2872 { 2873 Mat_SeqBAIJ *b; 2874 PetscErrorCode ierr; 2875 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2876 PetscTruth flg,skipallocation = PETSC_FALSE; 2877 2878 PetscFunctionBegin; 2879 2880 if (nz == MAT_SKIP_ALLOCATION) { 2881 skipallocation = PETSC_TRUE; 2882 nz = 0; 2883 } 2884 2885 if (bs < 0) { 2886 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2887 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2888 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2889 bs = PetscAbs(bs); 2890 } 2891 if (nnz && newbs != bs) { 2892 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2893 } 2894 bs = newbs; 2895 2896 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2897 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2898 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2899 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2900 2901 B->preallocated = PETSC_TRUE; 2902 2903 mbs = B->rmap->n/bs; 2904 nbs = B->cmap->n/bs; 2905 bs2 = bs*bs; 2906 2907 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2908 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2909 } 2910 2911 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2912 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2913 if (nnz) { 2914 for (i=0; i<mbs; i++) { 2915 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2916 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2917 } 2918 } 2919 2920 b = (Mat_SeqBAIJ*)B->data; 2921 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2922 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2923 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2924 2925 if (!flg) { 2926 switch (bs) { 2927 case 1: 2928 B->ops->mult = MatMult_SeqBAIJ_1; 2929 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2930 B->ops->sor = MatSOR_SeqBAIJ_1; 2931 break; 2932 case 2: 2933 B->ops->mult = MatMult_SeqBAIJ_2; 2934 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2935 B->ops->sor = MatSOR_SeqBAIJ_2; 2936 break; 2937 case 3: 2938 B->ops->mult = MatMult_SeqBAIJ_3; 2939 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2940 B->ops->sor = MatSOR_SeqBAIJ_3; 2941 break; 2942 case 4: 2943 B->ops->mult = MatMult_SeqBAIJ_4; 2944 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2945 B->ops->sor = MatSOR_SeqBAIJ_4; 2946 break; 2947 case 5: 2948 B->ops->mult = MatMult_SeqBAIJ_5; 2949 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2950 B->ops->sor = MatSOR_SeqBAIJ_5; 2951 break; 2952 case 6: 2953 B->ops->mult = MatMult_SeqBAIJ_6; 2954 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2955 B->ops->sor = MatSOR_SeqBAIJ_6; 2956 break; 2957 case 7: 2958 B->ops->mult = MatMult_SeqBAIJ_7; 2959 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2960 B->ops->sor = MatSOR_SeqBAIJ_7; 2961 break; 2962 default: 2963 B->ops->mult = MatMult_SeqBAIJ_N; 2964 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2965 break; 2966 } 2967 } 2968 B->rmap->bs = bs; 2969 b->mbs = mbs; 2970 b->nbs = nbs; 2971 if (!skipallocation) { 2972 if (!b->imax) { 2973 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2974 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2975 b->free_imax_ilen = PETSC_TRUE; 2976 } 2977 /* b->ilen will count nonzeros in each block row so far. */ 2978 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2979 if (!nnz) { 2980 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2981 else if (nz <= 0) nz = 1; 2982 for (i=0; i<mbs; i++) b->imax[i] = nz; 2983 nz = nz*mbs; 2984 } else { 2985 nz = 0; 2986 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2987 } 2988 2989 /* allocate the matrix space */ 2990 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2991 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2992 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2993 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2994 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2995 b->singlemalloc = PETSC_TRUE; 2996 b->i[0] = 0; 2997 for (i=1; i<mbs+1; i++) { 2998 b->i[i] = b->i[i-1] + b->imax[i-1]; 2999 } 3000 b->free_a = PETSC_TRUE; 3001 b->free_ij = PETSC_TRUE; 3002 } else { 3003 b->free_a = PETSC_FALSE; 3004 b->free_ij = PETSC_FALSE; 3005 } 3006 3007 B->rmap->bs = bs; 3008 b->bs2 = bs2; 3009 b->mbs = mbs; 3010 b->nz = 0; 3011 b->maxnz = nz*bs2; 3012 B->info.nz_unneeded = (PetscReal)b->maxnz; 3013 PetscFunctionReturn(0); 3014 } 3015 EXTERN_C_END 3016 3017 EXTERN_C_BEGIN 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3020 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3021 { 3022 PetscInt i,m,nz,nz_max=0,*nnz; 3023 PetscScalar *values=0; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 3028 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3029 3030 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3031 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3032 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3033 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3034 m = B->rmap->n/bs; 3035 3036 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3037 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3038 for(i=0; i<m; i++) { 3039 nz = ii[i+1]- ii[i]; 3040 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3041 nz_max = PetscMax(nz_max, nz); 3042 nnz[i] = nz; 3043 } 3044 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3045 ierr = PetscFree(nnz);CHKERRQ(ierr); 3046 3047 values = (PetscScalar*)V; 3048 if (!values) { 3049 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3050 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3051 } 3052 for (i=0; i<m; i++) { 3053 PetscInt ncols = ii[i+1] - ii[i]; 3054 const PetscInt *icols = jj + ii[i]; 3055 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3056 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3057 } 3058 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3059 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 3067 EXTERN_C_BEGIN 3068 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3070 EXTERN_C_END 3071 3072 /*MC 3073 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3074 block sparse compressed row format. 3075 3076 Options Database Keys: 3077 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3078 3079 Level: beginner 3080 3081 .seealso: MatCreateSeqBAIJ() 3082 M*/ 3083 3084 3085 EXTERN_C_BEGIN 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatCreate_SeqBAIJ" 3088 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3089 { 3090 PetscErrorCode ierr; 3091 PetscMPIInt size; 3092 Mat_SeqBAIJ *b; 3093 3094 PetscFunctionBegin; 3095 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3096 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3097 3098 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3099 B->data = (void*)b; 3100 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3101 B->mapping = 0; 3102 b->row = 0; 3103 b->col = 0; 3104 b->icol = 0; 3105 b->reallocs = 0; 3106 b->saved_values = 0; 3107 3108 b->roworiented = PETSC_TRUE; 3109 b->nonew = 0; 3110 b->diag = 0; 3111 b->solve_work = 0; 3112 b->mult_work = 0; 3113 B->spptr = 0; 3114 B->info.nz_unneeded = (PetscReal)b->maxnz; 3115 b->keepnonzeropattern = PETSC_FALSE; 3116 b->xtoy = 0; 3117 b->XtoY = 0; 3118 b->compressedrow.use = PETSC_FALSE; 3119 b->compressedrow.nrows = 0; 3120 b->compressedrow.i = PETSC_NULL; 3121 b->compressedrow.rindex = PETSC_NULL; 3122 b->compressedrow.checked = PETSC_FALSE; 3123 B->same_nonzero = PETSC_FALSE; 3124 3125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3126 "MatGetFactorAvailable_seqbaij_petsc", 3127 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3129 "MatGetFactor_seqbaij_petsc", 3130 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3132 "MatInvertBlockDiagonal_SeqBAIJ", 3133 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3135 "MatStoreValues_SeqBAIJ", 3136 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3137 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3138 "MatRetrieveValues_SeqBAIJ", 3139 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3140 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3141 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3142 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3143 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3144 "MatConvert_SeqBAIJ_SeqAIJ", 3145 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3146 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3147 "MatConvert_SeqBAIJ_SeqSBAIJ", 3148 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3149 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3150 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3151 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3152 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3153 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3154 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3155 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3156 PetscFunctionReturn(0); 3157 } 3158 EXTERN_C_END 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3162 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3163 { 3164 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3165 PetscErrorCode ierr; 3166 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3167 3168 PetscFunctionBegin; 3169 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3170 3171 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3172 c->imax = a->imax; 3173 c->ilen = a->ilen; 3174 c->free_imax_ilen = PETSC_FALSE; 3175 } else { 3176 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3177 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3178 for (i=0; i<mbs; i++) { 3179 c->imax[i] = a->imax[i]; 3180 c->ilen[i] = a->ilen[i]; 3181 } 3182 c->free_imax_ilen = PETSC_TRUE; 3183 } 3184 3185 /* allocate the matrix space */ 3186 if (mallocmatspace){ 3187 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3188 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3189 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3190 c->singlemalloc = PETSC_FALSE; 3191 c->free_ij = PETSC_FALSE; 3192 c->i = a->i; 3193 c->j = a->j; 3194 c->parent = A; 3195 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3196 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3197 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3198 } else { 3199 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3200 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3201 c->singlemalloc = PETSC_TRUE; 3202 c->free_ij = PETSC_TRUE; 3203 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3204 if (mbs > 0) { 3205 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3206 if (cpvalues == MAT_COPY_VALUES) { 3207 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3208 } else { 3209 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3210 } 3211 } 3212 } 3213 } 3214 3215 c->roworiented = a->roworiented; 3216 c->nonew = a->nonew; 3217 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3218 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3219 c->bs2 = a->bs2; 3220 c->mbs = a->mbs; 3221 c->nbs = a->nbs; 3222 3223 if (a->diag) { 3224 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3225 c->diag = a->diag; 3226 c->free_diag = PETSC_FALSE; 3227 } else { 3228 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3229 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3230 for (i=0; i<mbs; i++) { 3231 c->diag[i] = a->diag[i]; 3232 } 3233 c->free_diag = PETSC_TRUE; 3234 } 3235 } else c->diag = 0; 3236 c->nz = a->nz; 3237 c->maxnz = a->maxnz; 3238 c->solve_work = 0; 3239 c->mult_work = 0; 3240 c->free_a = PETSC_TRUE; 3241 c->free_ij = PETSC_TRUE; 3242 C->preallocated = PETSC_TRUE; 3243 C->assembled = PETSC_TRUE; 3244 3245 c->compressedrow.use = a->compressedrow.use; 3246 c->compressedrow.nrows = a->compressedrow.nrows; 3247 c->compressedrow.checked = a->compressedrow.checked; 3248 if (a->compressedrow.checked && a->compressedrow.use){ 3249 i = a->compressedrow.nrows; 3250 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3251 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3252 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3253 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3254 } else { 3255 c->compressedrow.use = PETSC_FALSE; 3256 c->compressedrow.i = PETSC_NULL; 3257 c->compressedrow.rindex = PETSC_NULL; 3258 } 3259 C->same_nonzero = A->same_nonzero; 3260 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3261 PetscFunctionReturn(0); 3262 } 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3266 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3267 { 3268 PetscErrorCode ierr; 3269 3270 PetscFunctionBegin; 3271 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3272 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3273 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3274 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3275 PetscFunctionReturn(0); 3276 } 3277 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatLoad_SeqBAIJ" 3280 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3281 { 3282 Mat_SeqBAIJ *a; 3283 Mat B; 3284 PetscErrorCode ierr; 3285 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3286 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3287 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3288 PetscInt *masked,nmask,tmp,bs2,ishift; 3289 PetscMPIInt size; 3290 int fd; 3291 PetscScalar *aa; 3292 MPI_Comm comm = ((PetscObject)viewer)->comm; 3293 3294 PetscFunctionBegin; 3295 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3296 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3297 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3298 bs2 = bs*bs; 3299 3300 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3301 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3302 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3303 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3304 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3305 M = header[1]; N = header[2]; nz = header[3]; 3306 3307 if (header[3] < 0) { 3308 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3309 } 3310 3311 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3312 3313 /* 3314 This code adds extra rows to make sure the number of rows is 3315 divisible by the blocksize 3316 */ 3317 mbs = M/bs; 3318 extra_rows = bs - M + bs*(mbs); 3319 if (extra_rows == bs) extra_rows = 0; 3320 else mbs++; 3321 if (extra_rows) { 3322 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3323 } 3324 3325 /* read in row lengths */ 3326 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3327 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3328 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3329 3330 /* read in column indices */ 3331 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3332 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3333 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3334 3335 /* loop over row lengths determining block row lengths */ 3336 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3337 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3338 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3339 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3340 rowcount = 0; 3341 nzcount = 0; 3342 for (i=0; i<mbs; i++) { 3343 nmask = 0; 3344 for (j=0; j<bs; j++) { 3345 kmax = rowlengths[rowcount]; 3346 for (k=0; k<kmax; k++) { 3347 tmp = jj[nzcount++]/bs; 3348 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3349 } 3350 rowcount++; 3351 } 3352 browlengths[i] += nmask; 3353 /* zero out the mask elements we set */ 3354 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3355 } 3356 3357 /* create our matrix */ 3358 ierr = MatCreate(comm,&B); 3359 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3360 ierr = MatSetType(B,type);CHKERRQ(ierr); 3361 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3362 a = (Mat_SeqBAIJ*)B->data; 3363 3364 /* set matrix "i" values */ 3365 a->i[0] = 0; 3366 for (i=1; i<= mbs; i++) { 3367 a->i[i] = a->i[i-1] + browlengths[i-1]; 3368 a->ilen[i-1] = browlengths[i-1]; 3369 } 3370 a->nz = 0; 3371 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3372 3373 /* read in nonzero values */ 3374 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3375 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3376 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3377 3378 /* set "a" and "j" values into matrix */ 3379 nzcount = 0; jcount = 0; 3380 for (i=0; i<mbs; i++) { 3381 nzcountb = nzcount; 3382 nmask = 0; 3383 for (j=0; j<bs; j++) { 3384 kmax = rowlengths[i*bs+j]; 3385 for (k=0; k<kmax; k++) { 3386 tmp = jj[nzcount++]/bs; 3387 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3388 } 3389 } 3390 /* sort the masked values */ 3391 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3392 3393 /* set "j" values into matrix */ 3394 maskcount = 1; 3395 for (j=0; j<nmask; j++) { 3396 a->j[jcount++] = masked[j]; 3397 mask[masked[j]] = maskcount++; 3398 } 3399 /* set "a" values into matrix */ 3400 ishift = bs2*a->i[i]; 3401 for (j=0; j<bs; j++) { 3402 kmax = rowlengths[i*bs+j]; 3403 for (k=0; k<kmax; k++) { 3404 tmp = jj[nzcountb]/bs ; 3405 block = mask[tmp] - 1; 3406 point = jj[nzcountb] - bs*tmp; 3407 idx = ishift + bs2*block + j + bs*point; 3408 a->a[idx] = (MatScalar)aa[nzcountb++]; 3409 } 3410 } 3411 /* zero out the mask elements we set */ 3412 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3413 } 3414 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3415 3416 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3417 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3418 ierr = PetscFree(aa);CHKERRQ(ierr); 3419 ierr = PetscFree(jj);CHKERRQ(ierr); 3420 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3421 3422 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3423 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3424 ierr = MatView_Private(B);CHKERRQ(ierr); 3425 3426 *A = B; 3427 PetscFunctionReturn(0); 3428 } 3429 3430 #undef __FUNCT__ 3431 #define __FUNCT__ "MatCreateSeqBAIJ" 3432 /*@C 3433 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3434 compressed row) format. For good matrix assembly performance the 3435 user should preallocate the matrix storage by setting the parameter nz 3436 (or the array nnz). By setting these parameters accurately, performance 3437 during matrix assembly can be increased by more than a factor of 50. 3438 3439 Collective on MPI_Comm 3440 3441 Input Parameters: 3442 + comm - MPI communicator, set to PETSC_COMM_SELF 3443 . bs - size of block 3444 . m - number of rows 3445 . n - number of columns 3446 . nz - number of nonzero blocks per block row (same for all rows) 3447 - nnz - array containing the number of nonzero blocks in the various block rows 3448 (possibly different for each block row) or PETSC_NULL 3449 3450 Output Parameter: 3451 . A - the matrix 3452 3453 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3454 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3455 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3456 3457 Options Database Keys: 3458 . -mat_no_unroll - uses code that does not unroll the loops in the 3459 block calculations (much slower) 3460 . -mat_block_size - size of the blocks to use 3461 3462 Level: intermediate 3463 3464 Notes: 3465 The number of rows and columns must be divisible by blocksize. 3466 3467 If the nnz parameter is given then the nz parameter is ignored 3468 3469 A nonzero block is any block that as 1 or more nonzeros in it 3470 3471 The block AIJ format is fully compatible with standard Fortran 77 3472 storage. That is, the stored row and column indices can begin at 3473 either one (as in Fortran) or zero. See the users' manual for details. 3474 3475 Specify the preallocated storage with either nz or nnz (not both). 3476 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3477 allocation. For additional details, see the users manual chapter on 3478 matrices. 3479 3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3481 @*/ 3482 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3483 { 3484 PetscErrorCode ierr; 3485 3486 PetscFunctionBegin; 3487 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3488 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3489 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3490 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 #undef __FUNCT__ 3495 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3496 /*@C 3497 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3498 per row in the matrix. For good matrix assembly performance the 3499 user should preallocate the matrix storage by setting the parameter nz 3500 (or the array nnz). By setting these parameters accurately, performance 3501 during matrix assembly can be increased by more than a factor of 50. 3502 3503 Collective on MPI_Comm 3504 3505 Input Parameters: 3506 + A - the matrix 3507 . bs - size of block 3508 . nz - number of block nonzeros per block row (same for all rows) 3509 - nnz - array containing the number of block nonzeros in the various block rows 3510 (possibly different for each block row) or PETSC_NULL 3511 3512 Options Database Keys: 3513 . -mat_no_unroll - uses code that does not unroll the loops in the 3514 block calculations (much slower) 3515 . -mat_block_size - size of the blocks to use 3516 3517 Level: intermediate 3518 3519 Notes: 3520 If the nnz parameter is given then the nz parameter is ignored 3521 3522 You can call MatGetInfo() to get information on how effective the preallocation was; 3523 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3524 You can also run with the option -info and look for messages with the string 3525 malloc in them to see if additional memory allocation was needed. 3526 3527 The block AIJ format is fully compatible with standard Fortran 77 3528 storage. That is, the stored row and column indices can begin at 3529 either one (as in Fortran) or zero. See the users' manual for details. 3530 3531 Specify the preallocated storage with either nz or nnz (not both). 3532 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3533 allocation. For additional details, see the users manual chapter on 3534 matrices. 3535 3536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3537 @*/ 3538 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3539 { 3540 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3541 3542 PetscFunctionBegin; 3543 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3544 if (f) { 3545 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3546 } 3547 PetscFunctionReturn(0); 3548 } 3549 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3552 /*@C 3553 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3554 (the default sequential PETSc format). 3555 3556 Collective on MPI_Comm 3557 3558 Input Parameters: 3559 + A - the matrix 3560 . i - the indices into j for the start of each local row (starts with zero) 3561 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3562 - v - optional values in the matrix 3563 3564 Level: developer 3565 3566 .keywords: matrix, aij, compressed row, sparse 3567 3568 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3569 @*/ 3570 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3571 { 3572 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3573 3574 PetscFunctionBegin; 3575 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3576 if (f) { 3577 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3578 } 3579 PetscFunctionReturn(0); 3580 } 3581 3582 3583 #undef __FUNCT__ 3584 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3585 /*@ 3586 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3587 (upper triangular entries in CSR format) provided by the user. 3588 3589 Collective on MPI_Comm 3590 3591 Input Parameters: 3592 + comm - must be an MPI communicator of size 1 3593 . bs - size of block 3594 . m - number of rows 3595 . n - number of columns 3596 . i - row indices 3597 . j - column indices 3598 - a - matrix values 3599 3600 Output Parameter: 3601 . mat - the matrix 3602 3603 Level: intermediate 3604 3605 Notes: 3606 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3607 once the matrix is destroyed 3608 3609 You cannot set new nonzero locations into this matrix, that will generate an error. 3610 3611 The i and j indices are 0 based 3612 3613 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3614 3615 @*/ 3616 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3617 { 3618 PetscErrorCode ierr; 3619 PetscInt ii; 3620 Mat_SeqBAIJ *baij; 3621 3622 PetscFunctionBegin; 3623 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3624 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3625 3626 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3627 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3628 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3629 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3630 baij = (Mat_SeqBAIJ*)(*mat)->data; 3631 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3632 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3633 3634 baij->i = i; 3635 baij->j = j; 3636 baij->a = a; 3637 baij->singlemalloc = PETSC_FALSE; 3638 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3639 baij->free_a = PETSC_FALSE; 3640 baij->free_ij = PETSC_FALSE; 3641 3642 for (ii=0; ii<m; ii++) { 3643 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3644 #if defined(PETSC_USE_DEBUG) 3645 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3646 #endif 3647 } 3648 #if defined(PETSC_USE_DEBUG) 3649 for (ii=0; ii<baij->i[m]; ii++) { 3650 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3651 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3652 } 3653 #endif 3654 3655 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3656 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 PetscFunctionReturn(0); 3658 } 3659