1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "../src/mat/blockinvert.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 12 /*@ 13 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 14 15 Collective on Mat 16 17 Input Parameters: 18 . mat - the matrix 19 20 Level: advanced 21 @*/ 22 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 23 { 24 PetscErrorCode ierr,(*f)(Mat); 25 26 PetscFunctionBegin; 27 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 28 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 29 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 30 31 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 32 if (f) { 33 ierr = (*f)(mat);CHKERRQ(ierr); 34 } else { 35 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 36 } 37 PetscFunctionReturn(0); 38 } 39 40 EXTERN_C_BEGIN 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 43 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 44 { 45 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 46 PetscErrorCode ierr; 47 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 48 MatScalar *v = a->a,*odiag,*diag,*mdiag; 49 PetscReal shift = 0.0; 50 51 PetscFunctionBegin; 52 if (a->idiagvalid) PetscFunctionReturn(0); 53 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 54 diag_offset = a->diag; 55 if (!a->idiag) { 56 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 57 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 58 } 59 diag = a->idiag; 60 mdiag = a->idiag+bs*bs*mbs; 61 /* factor and invert each block */ 62 switch (bs){ 63 case 1: 64 for (i=0; i<mbs; i++) { 65 odiag = v + 1*diag_offset[i]; 66 diag[0] = odiag[0]; 67 mdiag[0] = odiag[0]; 68 diag[0] = 1.0 / (diag[0] + shift); 69 diag += 1; 70 mdiag += 1; 71 } 72 break; 73 case 2: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 4*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 78 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 79 diag += 4; 80 mdiag += 4; 81 } 82 break; 83 case 3: 84 for (i=0; i<mbs; i++) { 85 odiag = v + 9*diag_offset[i]; 86 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 87 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 88 diag[8] = odiag[8]; 89 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 90 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 91 mdiag[8] = odiag[8]; 92 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 93 diag += 9; 94 mdiag += 9; 95 } 96 break; 97 case 4: 98 for (i=0; i<mbs; i++) { 99 odiag = v + 16*diag_offset[i]; 100 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 101 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 103 diag += 16; 104 mdiag += 16; 105 } 106 break; 107 case 5: 108 for (i=0; i<mbs; i++) { 109 odiag = v + 25*diag_offset[i]; 110 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 111 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 113 diag += 25; 114 mdiag += 25; 115 } 116 break; 117 case 6: 118 for (i=0; i<mbs; i++) { 119 odiag = v + 36*diag_offset[i]; 120 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 121 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 123 diag += 36; 124 mdiag += 36; 125 } 126 break; 127 default: 128 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 129 } 130 a->idiagvalid = PETSC_TRUE; 131 PetscFunctionReturn(0); 132 } 133 EXTERN_C_END 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 137 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 138 { 139 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 140 PetscScalar *x,x1,s1; 141 const PetscScalar *b; 142 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,j; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 its = its*lits; 150 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 155 156 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 157 158 diag = a->diag; 159 idiag = a->idiag; 160 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 161 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 162 163 if (flag & SOR_ZERO_INITIAL_GUESS) { 164 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 165 x[0] = b[0]*idiag[0]; 166 i2 = 1; 167 idiag += 1; 168 for (i=1; i<m; i++) { 169 v = aa + ai[i]; 170 vi = aj + ai[i]; 171 nz = diag[i] - ai[i]; 172 s1 = b[i2]; 173 for (j=0; j<nz; j++) { 174 s1 -= v[j]*x[vi[j]]; 175 } 176 x[i2] = idiag[0]*s1; 177 idiag += 1; 178 i2 += 1; 179 } 180 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 181 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 182 } 183 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 184 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 185 i2 = 0; 186 mdiag = a->idiag+a->mbs; 187 for (i=0; i<m; i++) { 188 x1 = x[i2]; 189 x[i2] = mdiag[0]*x1; 190 mdiag += 1; 191 i2 += 1; 192 } 193 ierr = PetscLogFlops(m);CHKERRQ(ierr); 194 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 195 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 196 } 197 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 198 idiag = a->idiag+a->mbs - 1; 199 i2 = m - 1; 200 x1 = x[i2]; 201 x[i2] = idiag[0]*x1; 202 idiag -= 1; 203 i2 -= 1; 204 for (i=m-2; i>=0; i--) { 205 v = aa + (diag[i]+1); 206 vi = aj + diag[i] + 1; 207 nz = ai[i+1] - diag[i] - 1; 208 s1 = x[i2]; 209 for (j=0; j<nz; j++) { 210 s1 -= v[j]*x[vi[j]]; 211 } 212 x[i2] = idiag[0]*s1; 213 idiag -= 1; 214 i2 -= 1; 215 } 216 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 217 } 218 } else { 219 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 220 } 221 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 222 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 228 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 229 { 230 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 231 PetscScalar *x,x1,x2,s1,s2; 232 const PetscScalar *b; 233 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 234 PetscErrorCode ierr; 235 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 236 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 237 238 PetscFunctionBegin; 239 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 240 its = its*lits; 241 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 242 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 243 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 244 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 245 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 246 247 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 248 249 diag = a->diag; 250 idiag = a->idiag; 251 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 252 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 253 254 if (flag & SOR_ZERO_INITIAL_GUESS) { 255 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 256 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 257 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 258 i2 = 2; 259 idiag += 4; 260 for (i=1; i<m; i++) { 261 v = aa + 4*ai[i]; 262 vi = aj + ai[i]; 263 nz = diag[i] - ai[i]; 264 s1 = b[i2]; s2 = b[i2+1]; 265 for (j=0; j<nz; j++) { 266 idx = 2*vi[j]; 267 it = 4*j; 268 x1 = x[idx]; x2 = x[1+idx]; 269 s1 -= v[it]*x1 + v[it+2]*x2; 270 s2 -= v[it+1]*x1 + v[it+3]*x2; 271 } 272 x[i2] = idiag[0]*s1 + idiag[2]*s2; 273 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 274 idiag += 4; 275 i2 += 2; 276 } 277 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 278 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 279 } 280 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 281 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 282 i2 = 0; 283 mdiag = a->idiag+4*a->mbs; 284 for (i=0; i<m; i++) { 285 x1 = x[i2]; x2 = x[i2+1]; 286 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 287 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 288 mdiag += 4; 289 i2 += 2; 290 } 291 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 292 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 293 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 294 } 295 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 296 idiag = a->idiag+4*a->mbs - 4; 297 i2 = 2*m - 2; 298 x1 = x[i2]; x2 = x[i2+1]; 299 x[i2] = idiag[0]*x1 + idiag[2]*x2; 300 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 301 idiag -= 4; 302 i2 -= 2; 303 for (i=m-2; i>=0; i--) { 304 v = aa + 4*(diag[i]+1); 305 vi = aj + diag[i] + 1; 306 nz = ai[i+1] - diag[i] - 1; 307 s1 = x[i2]; s2 = x[i2+1]; 308 for (j=0; j<nz; j++) { 309 idx = 2*vi[j]; 310 it = 4*j; 311 x1 = x[idx]; x2 = x[1+idx]; 312 s1 -= v[it]*x1 + v[it+2]*x2; 313 s2 -= v[it+1]*x1 + v[it+3]*x2; 314 } 315 x[i2] = idiag[0]*s1 + idiag[2]*s2; 316 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 317 idiag -= 4; 318 i2 -= 2; 319 } 320 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 321 } 322 } else { 323 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 324 } 325 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 326 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 332 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 333 { 334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 335 PetscScalar *x,x1,x2,x3,s1,s2,s3; 336 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 337 const PetscScalar *b; 338 PetscErrorCode ierr; 339 PetscInt m = a->mbs,i,i2,nz,idx; 340 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 341 342 PetscFunctionBegin; 343 its = its*lits; 344 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 345 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 346 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 347 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 348 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 349 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 350 351 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 352 353 diag = a->diag; 354 idiag = a->idiag; 355 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 356 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 357 358 if (flag & SOR_ZERO_INITIAL_GUESS) { 359 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 360 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 361 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 362 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 363 i2 = 3; 364 idiag += 9; 365 for (i=1; i<m; i++) { 366 v = aa + 9*ai[i]; 367 vi = aj + ai[i]; 368 nz = diag[i] - ai[i]; 369 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 370 while (nz--) { 371 idx = 3*(*vi++); 372 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 373 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 374 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 375 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 376 v += 9; 377 } 378 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 379 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 380 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 381 idiag += 9; 382 i2 += 3; 383 } 384 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 385 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 386 } 387 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 388 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 389 i2 = 0; 390 mdiag = a->idiag+9*a->mbs; 391 for (i=0; i<m; i++) { 392 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 393 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 394 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 395 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 396 mdiag += 9; 397 i2 += 3; 398 } 399 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+9*a->mbs - 9; 405 i2 = 3*m - 3; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 407 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 408 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 409 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 410 idiag -= 9; 411 i2 -= 3; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 9*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 417 while (nz--) { 418 idx = 3*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 420 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 421 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 422 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 423 v += 9; 424 } 425 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 426 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 427 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 428 idiag -= 9; 429 i2 -= 3; 430 } 431 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 432 } 433 } else { 434 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 435 } 436 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 437 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 443 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 444 { 445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 446 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 447 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 448 const PetscScalar *b; 449 PetscErrorCode ierr; 450 PetscInt m = a->mbs,i,i2,nz,idx; 451 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 452 453 PetscFunctionBegin; 454 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 475 i2 = 4; 476 idiag += 16; 477 for (i=1; i<m; i++) { 478 v = aa + 16*ai[i]; 479 vi = aj + ai[i]; 480 nz = diag[i] - ai[i]; 481 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 482 while (nz--) { 483 idx = 4*(*vi++); 484 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 485 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 486 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 487 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 488 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 489 v += 16; 490 } 491 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 492 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 493 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 494 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 495 idiag += 16; 496 i2 += 4; 497 } 498 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 499 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 500 } 501 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 502 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 503 i2 = 0; 504 mdiag = a->idiag+16*a->mbs; 505 for (i=0; i<m; i++) { 506 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 507 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 508 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 509 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 510 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 511 mdiag += 16; 512 i2 += 4; 513 } 514 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 515 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 516 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 517 } 518 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 519 idiag = a->idiag+16*a->mbs - 16; 520 i2 = 4*m - 4; 521 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 522 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 523 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 524 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 525 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 526 idiag -= 16; 527 i2 -= 4; 528 for (i=m-2; i>=0; i--) { 529 v = aa + 16*(diag[i]+1); 530 vi = aj + diag[i] + 1; 531 nz = ai[i+1] - diag[i] - 1; 532 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 533 while (nz--) { 534 idx = 4*(*vi++); 535 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 536 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 537 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 538 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 539 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 540 v += 16; 541 } 542 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 543 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 544 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 545 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 546 idiag -= 16; 547 i2 -= 4; 548 } 549 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 550 } 551 } else { 552 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 553 } 554 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 555 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 561 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 562 { 563 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 564 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 565 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 566 const PetscScalar *b; 567 PetscErrorCode ierr; 568 PetscInt m = a->mbs,i,i2,nz,idx; 569 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 570 571 PetscFunctionBegin; 572 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 573 its = its*lits; 574 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 575 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 576 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 577 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 578 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 579 580 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 581 582 diag = a->diag; 583 idiag = a->idiag; 584 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 585 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 586 587 if (flag & SOR_ZERO_INITIAL_GUESS) { 588 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 589 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 590 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 591 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 592 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 593 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 594 i2 = 5; 595 idiag += 25; 596 for (i=1; i<m; i++) { 597 v = aa + 25*ai[i]; 598 vi = aj + ai[i]; 599 nz = diag[i] - ai[i]; 600 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 601 while (nz--) { 602 idx = 5*(*vi++); 603 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 604 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 605 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 606 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 607 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 608 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 609 v += 25; 610 } 611 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 612 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 613 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 614 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 615 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 616 idiag += 25; 617 i2 += 5; 618 } 619 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 620 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 621 } 622 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 623 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 624 i2 = 0; 625 mdiag = a->idiag+25*a->mbs; 626 for (i=0; i<m; i++) { 627 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 628 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 629 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 630 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 631 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 632 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 633 mdiag += 25; 634 i2 += 5; 635 } 636 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 637 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 638 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 639 } 640 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 641 idiag = a->idiag+25*a->mbs - 25; 642 i2 = 5*m - 5; 643 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 644 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 645 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 646 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 647 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 648 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 649 idiag -= 25; 650 i2 -= 5; 651 for (i=m-2; i>=0; i--) { 652 v = aa + 25*(diag[i]+1); 653 vi = aj + diag[i] + 1; 654 nz = ai[i+1] - diag[i] - 1; 655 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 656 while (nz--) { 657 idx = 5*(*vi++); 658 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 659 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 660 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 661 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 662 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 663 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 664 v += 25; 665 } 666 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 667 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 668 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 669 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 670 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 671 idiag -= 25; 672 i2 -= 5; 673 } 674 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 675 } 676 } else { 677 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 678 } 679 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 680 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 686 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 687 { 688 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 689 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 690 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 691 const PetscScalar *b; 692 PetscErrorCode ierr; 693 PetscInt m = a->mbs,i,i2,nz,idx; 694 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 695 696 PetscFunctionBegin; 697 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 698 its = its*lits; 699 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 700 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 701 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 702 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 703 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 704 705 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 706 707 diag = a->diag; 708 idiag = a->idiag; 709 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 710 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 711 712 if (flag & SOR_ZERO_INITIAL_GUESS) { 713 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 714 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 715 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 716 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 717 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 718 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 719 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 720 i2 = 6; 721 idiag += 36; 722 for (i=1; i<m; i++) { 723 v = aa + 36*ai[i]; 724 vi = aj + ai[i]; 725 nz = diag[i] - ai[i]; 726 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 727 while (nz--) { 728 idx = 6*(*vi++); 729 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 730 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 731 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 732 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 733 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 734 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 735 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 736 v += 36; 737 } 738 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 739 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 740 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 741 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 742 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 743 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 744 idiag += 36; 745 i2 += 6; 746 } 747 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 748 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 749 } 750 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 751 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 752 i2 = 0; 753 mdiag = a->idiag+36*a->mbs; 754 for (i=0; i<m; i++) { 755 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 756 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 757 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 758 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 759 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 760 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 761 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 762 mdiag += 36; 763 i2 += 6; 764 } 765 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 766 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 767 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 768 } 769 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 770 idiag = a->idiag+36*a->mbs - 36; 771 i2 = 6*m - 6; 772 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 773 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 774 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 775 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 776 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 777 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 778 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 779 idiag -= 36; 780 i2 -= 6; 781 for (i=m-2; i>=0; i--) { 782 v = aa + 36*(diag[i]+1); 783 vi = aj + diag[i] + 1; 784 nz = ai[i+1] - diag[i] - 1; 785 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 786 while (nz--) { 787 idx = 6*(*vi++); 788 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 789 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 790 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 791 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 792 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 793 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 794 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 795 v += 36; 796 } 797 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 798 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 799 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 800 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 801 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 802 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 803 idiag -= 36; 804 i2 -= 6; 805 } 806 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 807 } 808 } else { 809 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 810 } 811 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 812 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 818 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 819 { 820 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 821 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 822 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 823 const PetscScalar *b; 824 PetscErrorCode ierr; 825 PetscInt m = a->mbs,i,i2,nz,idx; 826 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 827 828 PetscFunctionBegin; 829 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 830 its = its*lits; 831 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 832 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 833 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 834 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 835 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 836 837 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 838 839 diag = a->diag; 840 idiag = a->idiag; 841 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 842 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 843 844 if (flag & SOR_ZERO_INITIAL_GUESS) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 846 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 847 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 848 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 849 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 850 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 851 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 852 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 853 i2 = 7; 854 idiag += 49; 855 for (i=1; i<m; i++) { 856 v = aa + 49*ai[i]; 857 vi = aj + ai[i]; 858 nz = diag[i] - ai[i]; 859 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 860 while (nz--) { 861 idx = 7*(*vi++); 862 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 863 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 864 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 865 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 866 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 867 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 868 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 869 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 870 v += 49; 871 } 872 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 873 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 874 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 875 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 876 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 877 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 878 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 879 idiag += 49; 880 i2 += 7; 881 } 882 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 883 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 884 } 885 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 886 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 887 i2 = 0; 888 mdiag = a->idiag+49*a->mbs; 889 for (i=0; i<m; i++) { 890 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 891 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 892 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 893 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 894 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 895 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 896 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 897 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 898 mdiag += 36; 899 i2 += 6; 900 } 901 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 902 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 903 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 904 } 905 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 906 idiag = a->idiag+49*a->mbs - 49; 907 i2 = 7*m - 7; 908 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 909 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 910 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 911 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 912 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 913 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 914 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 915 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 916 idiag -= 49; 917 i2 -= 7; 918 for (i=m-2; i>=0; i--) { 919 v = aa + 49*(diag[i]+1); 920 vi = aj + diag[i] + 1; 921 nz = ai[i+1] - diag[i] - 1; 922 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 923 while (nz--) { 924 idx = 7*(*vi++); 925 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 926 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 927 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 928 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 929 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 930 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 931 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 932 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 933 v += 49; 934 } 935 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 936 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 937 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 938 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 939 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 940 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 941 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 942 idiag -= 49; 943 i2 -= 7; 944 } 945 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 946 } 947 } else { 948 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 949 } 950 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 951 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 Special version for direct calls from Fortran (Used in PETSc-fun3d) 957 */ 958 #if defined(PETSC_HAVE_FORTRAN_CAPS) 959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 961 #define matsetvaluesblocked4_ matsetvaluesblocked4 962 #endif 963 964 EXTERN_C_BEGIN 965 #undef __FUNCT__ 966 #define __FUNCT__ "matsetvaluesblocked4_" 967 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 968 { 969 Mat A = *AA; 970 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 971 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 972 PetscInt *ai=a->i,*ailen=a->ilen; 973 PetscInt *aj=a->j,stepval,lastcol = -1; 974 const PetscScalar *value = v; 975 MatScalar *ap,*aa = a->a,*bap; 976 977 PetscFunctionBegin; 978 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 979 stepval = (n-1)*4; 980 for (k=0; k<m; k++) { /* loop over added rows */ 981 row = im[k]; 982 rp = aj + ai[row]; 983 ap = aa + 16*ai[row]; 984 nrow = ailen[row]; 985 low = 0; 986 high = nrow; 987 for (l=0; l<n; l++) { /* loop over added columns */ 988 col = in[l]; 989 if (col <= lastcol) low = 0; else high = nrow; 990 lastcol = col; 991 value = v + k*(stepval+4 + l)*4; 992 while (high-low > 7) { 993 t = (low+high)/2; 994 if (rp[t] > col) high = t; 995 else low = t; 996 } 997 for (i=low; i<high; i++) { 998 if (rp[i] > col) break; 999 if (rp[i] == col) { 1000 bap = ap + 16*i; 1001 for (ii=0; ii<4; ii++,value+=stepval) { 1002 for (jj=ii; jj<16; jj+=4) { 1003 bap[jj] += *value++; 1004 } 1005 } 1006 goto noinsert2; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column index thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N >= i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = col; 1020 bap = ap + 16*i; 1021 for (ii=0; ii<4; ii++,value+=stepval) { 1022 for (jj=ii; jj<16; jj+=4) { 1023 bap[jj] = *value++; 1024 } 1025 } 1026 noinsert2:; 1027 low = i; 1028 } 1029 ailen[row] = nrow; 1030 } 1031 PetscFunctionReturnVoid(); 1032 } 1033 EXTERN_C_END 1034 1035 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1036 #define matsetvalues4_ MATSETVALUES4 1037 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1038 #define matsetvalues4_ matsetvalues4 1039 #endif 1040 1041 EXTERN_C_BEGIN 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatSetValues4_" 1044 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1045 { 1046 Mat A = *AA; 1047 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1048 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1049 PetscInt *ai=a->i,*ailen=a->ilen; 1050 PetscInt *aj=a->j,brow,bcol; 1051 PetscInt ridx,cidx,lastcol = -1; 1052 MatScalar *ap,value,*aa=a->a,*bap; 1053 1054 PetscFunctionBegin; 1055 for (k=0; k<m; k++) { /* loop over added rows */ 1056 row = im[k]; brow = row/4; 1057 rp = aj + ai[brow]; 1058 ap = aa + 16*ai[brow]; 1059 nrow = ailen[brow]; 1060 low = 0; 1061 high = nrow; 1062 for (l=0; l<n; l++) { /* loop over added columns */ 1063 col = in[l]; bcol = col/4; 1064 ridx = row % 4; cidx = col % 4; 1065 value = v[l + k*n]; 1066 if (col <= lastcol) low = 0; else high = nrow; 1067 lastcol = col; 1068 while (high-low > 7) { 1069 t = (low+high)/2; 1070 if (rp[t] > bcol) high = t; 1071 else low = t; 1072 } 1073 for (i=low; i<high; i++) { 1074 if (rp[i] > bcol) break; 1075 if (rp[i] == bcol) { 1076 bap = ap + 16*i + 4*cidx + ridx; 1077 *bap += value; 1078 goto noinsert1; 1079 } 1080 } 1081 N = nrow++ - 1; 1082 high++; /* added new column thus must search to one higher than before */ 1083 /* shift up all the later entries in this row */ 1084 for (ii=N; ii>=i; ii--) { 1085 rp[ii+1] = rp[ii]; 1086 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1087 } 1088 if (N>=i) { 1089 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1090 } 1091 rp[i] = bcol; 1092 ap[16*i + 4*cidx + ridx] = value; 1093 noinsert1:; 1094 low = i; 1095 } 1096 ailen[brow] = nrow; 1097 } 1098 PetscFunctionReturnVoid(); 1099 } 1100 EXTERN_C_END 1101 1102 /* 1103 Checks for missing diagonals 1104 */ 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1107 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1108 { 1109 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 PetscInt *diag,*jj = a->j,i; 1112 1113 PetscFunctionBegin; 1114 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1115 *missing = PETSC_FALSE; 1116 if (A->rmap->n > 0 && !jj) { 1117 *missing = PETSC_TRUE; 1118 if (d) *d = 0; 1119 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1120 } else { 1121 diag = a->diag; 1122 for (i=0; i<a->mbs; i++) { 1123 if (jj[diag[i]] != i) { 1124 *missing = PETSC_TRUE; 1125 if (d) *d = i; 1126 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1127 } 1128 } 1129 } 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1135 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1136 { 1137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 PetscInt i,j,m = a->mbs; 1140 1141 PetscFunctionBegin; 1142 if (!a->diag) { 1143 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1144 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1145 a->free_diag = PETSC_TRUE; 1146 } 1147 for (i=0; i<m; i++) { 1148 a->diag[i] = a->i[i+1]; 1149 for (j=a->i[i]; j<a->i[i+1]; j++) { 1150 if (a->j[j] == i) { 1151 a->diag[i] = j; 1152 break; 1153 } 1154 } 1155 } 1156 PetscFunctionReturn(0); 1157 } 1158 1159 1160 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1164 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1165 { 1166 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1167 PetscErrorCode ierr; 1168 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1169 PetscInt *tia, *tja; 1170 1171 PetscFunctionBegin; 1172 *nn = n; 1173 if (!ia) PetscFunctionReturn(0); 1174 if (symmetric) { 1175 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1176 nz = tia[n]; 1177 } else { 1178 tia = a->i; tja = a->j; 1179 } 1180 1181 if (!blockcompressed && bs > 1) { 1182 (*nn) *= bs; 1183 nbs = bs; 1184 /* malloc & create the natural set of indices */ 1185 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1186 if (n) { 1187 (*ia)[0] = 0; 1188 for (j=1; j<bs; j++) { 1189 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1190 } 1191 } 1192 1193 for (i=1; i<n; i++) { 1194 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1195 for (j=1; j<bs; j++) { 1196 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1197 } 1198 } 1199 if (n) { 1200 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1201 } 1202 1203 if (ja) { 1204 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1205 cnt = 0; 1206 for (i=0; i<n; i++) { 1207 for (j=0; j<bs; j++) { 1208 for (k=tia[i]; k<tia[i+1]; k++) { 1209 for (l=0; l<bs; l++) { 1210 (*ja)[cnt++] = bs*tja[k] + l; 1211 } 1212 } 1213 } 1214 } 1215 } 1216 1217 n *= bs; 1218 nz *= bs*bs; 1219 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1220 ierr = PetscFree(tia);CHKERRQ(ierr); 1221 ierr = PetscFree(tja);CHKERRQ(ierr); 1222 } 1223 } else if (oshift == 1) { 1224 if (symmetric) { 1225 PetscInt nz = tia[A->rmap->n/bs]; 1226 /* add 1 to i and j indices */ 1227 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1228 *ia = tia; 1229 if (ja) { 1230 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1231 *ja = tja; 1232 } 1233 } else { 1234 PetscInt nz = a->i[A->rmap->n/bs]; 1235 /* malloc space and add 1 to i and j indices */ 1236 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1237 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1238 if (ja) { 1239 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1240 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1241 } 1242 } 1243 } else { 1244 *ia = tia; 1245 if (ja) *ja = tja; 1246 } 1247 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1253 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1254 { 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBegin; 1258 if (!ia) PetscFunctionReturn(0); 1259 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1260 ierr = PetscFree(*ia);CHKERRQ(ierr); 1261 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1262 } 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1268 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1269 { 1270 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1276 #endif 1277 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1278 if (a->row) { 1279 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1280 } 1281 if (a->col) { 1282 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1283 } 1284 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1285 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1286 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1287 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1288 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1289 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1290 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1291 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1292 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1293 1294 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1295 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1296 ierr = PetscFree(a);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1312 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 switch (op) { 1319 case MAT_ROW_ORIENTED: 1320 a->roworiented = flg; 1321 break; 1322 case MAT_KEEP_NONZERO_PATTERN: 1323 a->keepnonzeropattern = flg; 1324 break; 1325 case MAT_NEW_NONZERO_LOCATIONS: 1326 a->nonew = (flg ? 0 : 1); 1327 break; 1328 case MAT_NEW_NONZERO_LOCATION_ERR: 1329 a->nonew = (flg ? -1 : 0); 1330 break; 1331 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1332 a->nonew = (flg ? -2 : 0); 1333 break; 1334 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1335 a->nounused = (flg ? -1 : 0); 1336 break; 1337 case MAT_NEW_DIAGONALS: 1338 case MAT_IGNORE_OFF_PROC_ENTRIES: 1339 case MAT_USE_HASH_TABLE: 1340 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1341 break; 1342 case MAT_SYMMETRIC: 1343 case MAT_STRUCTURALLY_SYMMETRIC: 1344 case MAT_HERMITIAN: 1345 case MAT_SYMMETRY_ETERNAL: 1346 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1347 break; 1348 default: 1349 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1357 { 1358 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1361 MatScalar *aa,*aa_i; 1362 PetscScalar *v_i; 1363 1364 PetscFunctionBegin; 1365 bs = A->rmap->bs; 1366 ai = a->i; 1367 aj = a->j; 1368 aa = a->a; 1369 bs2 = a->bs2; 1370 1371 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1372 1373 bn = row/bs; /* Block number */ 1374 bp = row % bs; /* Block Position */ 1375 M = ai[bn+1] - ai[bn]; 1376 *nz = bs*M; 1377 1378 if (v) { 1379 *v = 0; 1380 if (*nz) { 1381 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1382 for (i=0; i<M; i++) { /* for each block in the block row */ 1383 v_i = *v + i*bs; 1384 aa_i = aa + bs2*(ai[bn] + i); 1385 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1386 } 1387 } 1388 } 1389 1390 if (idx) { 1391 *idx = 0; 1392 if (*nz) { 1393 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1394 for (i=0; i<M; i++) { /* for each block in the block row */ 1395 idx_i = *idx + i*bs; 1396 itmp = bs*aj[ai[bn] + i]; 1397 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1398 } 1399 } 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1406 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1407 { 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1412 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1413 PetscFunctionReturn(0); 1414 } 1415 1416 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1420 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1421 { 1422 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1423 Mat C; 1424 PetscErrorCode ierr; 1425 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1426 PetscInt *rows,*cols,bs2=a->bs2; 1427 MatScalar *array; 1428 1429 PetscFunctionBegin; 1430 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1431 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1432 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1433 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1434 1435 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1436 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1437 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1438 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1439 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1440 ierr = PetscFree(col);CHKERRQ(ierr); 1441 } else { 1442 C = *B; 1443 } 1444 1445 array = a->a; 1446 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1447 for (i=0; i<mbs; i++) { 1448 cols[0] = i*bs; 1449 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1450 len = ai[i+1] - ai[i]; 1451 for (j=0; j<len; j++) { 1452 rows[0] = (*aj++)*bs; 1453 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1454 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1455 array += bs2; 1456 } 1457 } 1458 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1459 1460 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 1463 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1464 *B = C; 1465 } else { 1466 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1478 int fd; 1479 PetscScalar *aa; 1480 FILE *file; 1481 1482 PetscFunctionBegin; 1483 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1484 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1485 col_lens[0] = MAT_FILE_COOKIE; 1486 1487 col_lens[1] = A->rmap->N; 1488 col_lens[2] = A->cmap->n; 1489 col_lens[3] = a->nz*bs2; 1490 1491 /* store lengths of each row and write (including header) to file */ 1492 count = 0; 1493 for (i=0; i<a->mbs; i++) { 1494 for (j=0; j<bs; j++) { 1495 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1496 } 1497 } 1498 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1499 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1500 1501 /* store column indices (zero start index) */ 1502 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1503 count = 0; 1504 for (i=0; i<a->mbs; i++) { 1505 for (j=0; j<bs; j++) { 1506 for (k=a->i[i]; k<a->i[i+1]; k++) { 1507 for (l=0; l<bs; l++) { 1508 jj[count++] = bs*a->j[k] + l; 1509 } 1510 } 1511 } 1512 } 1513 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1514 ierr = PetscFree(jj);CHKERRQ(ierr); 1515 1516 /* store nonzero values */ 1517 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1518 count = 0; 1519 for (i=0; i<a->mbs; i++) { 1520 for (j=0; j<bs; j++) { 1521 for (k=a->i[i]; k<a->i[i+1]; k++) { 1522 for (l=0; l<bs; l++) { 1523 aa[count++] = a->a[bs2*k + l*bs + j]; 1524 } 1525 } 1526 } 1527 } 1528 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1529 ierr = PetscFree(aa);CHKERRQ(ierr); 1530 1531 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1532 if (file) { 1533 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1541 { 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1543 PetscErrorCode ierr; 1544 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1545 PetscViewerFormat format; 1546 1547 PetscFunctionBegin; 1548 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1549 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1550 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1551 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1552 Mat aij; 1553 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1554 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1555 ierr = MatDestroy(aij);CHKERRQ(ierr); 1556 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1557 PetscFunctionReturn(0); 1558 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1560 for (i=0; i<a->mbs; i++) { 1561 for (j=0; j<bs; j++) { 1562 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1563 for (k=a->i[i]; k<a->i[i+1]; k++) { 1564 for (l=0; l<bs; l++) { 1565 #if defined(PETSC_USE_COMPLEX) 1566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1568 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1571 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1572 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } 1575 #else 1576 if (a->a[bs2*k + l*bs + j] != 0.0) { 1577 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1578 } 1579 #endif 1580 } 1581 } 1582 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1583 } 1584 } 1585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1586 } else { 1587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1588 for (i=0; i<a->mbs; i++) { 1589 for (j=0; j<bs; j++) { 1590 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1591 for (k=a->i[i]; k<a->i[i+1]; k++) { 1592 for (l=0; l<bs; l++) { 1593 #if defined(PETSC_USE_COMPLEX) 1594 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1595 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1596 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1597 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1598 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1599 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1600 } else { 1601 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1602 } 1603 #else 1604 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1605 #endif 1606 } 1607 } 1608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1609 } 1610 } 1611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1612 } 1613 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1620 { 1621 Mat A = (Mat) Aa; 1622 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1623 PetscErrorCode ierr; 1624 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1625 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1626 MatScalar *aa; 1627 PetscViewer viewer; 1628 1629 PetscFunctionBegin; 1630 1631 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1632 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1633 1634 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1635 1636 /* loop over matrix elements drawing boxes */ 1637 color = PETSC_DRAW_BLUE; 1638 for (i=0,row=0; i<mbs; i++,row+=bs) { 1639 for (j=a->i[i]; j<a->i[i+1]; j++) { 1640 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1641 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1642 aa = a->a + j*bs2; 1643 for (k=0; k<bs; k++) { 1644 for (l=0; l<bs; l++) { 1645 if (PetscRealPart(*aa++) >= 0.) continue; 1646 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1647 } 1648 } 1649 } 1650 } 1651 color = PETSC_DRAW_CYAN; 1652 for (i=0,row=0; i<mbs; i++,row+=bs) { 1653 for (j=a->i[i]; j<a->i[i+1]; j++) { 1654 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1655 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1656 aa = a->a + j*bs2; 1657 for (k=0; k<bs; k++) { 1658 for (l=0; l<bs; l++) { 1659 if (PetscRealPart(*aa++) != 0.) continue; 1660 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1661 } 1662 } 1663 } 1664 } 1665 1666 color = PETSC_DRAW_RED; 1667 for (i=0,row=0; i<mbs; i++,row+=bs) { 1668 for (j=a->i[i]; j<a->i[i+1]; j++) { 1669 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1670 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1671 aa = a->a + j*bs2; 1672 for (k=0; k<bs; k++) { 1673 for (l=0; l<bs; l++) { 1674 if (PetscRealPart(*aa++) <= 0.) continue; 1675 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1676 } 1677 } 1678 } 1679 } 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1686 { 1687 PetscErrorCode ierr; 1688 PetscReal xl,yl,xr,yr,w,h; 1689 PetscDraw draw; 1690 PetscTruth isnull; 1691 1692 PetscFunctionBegin; 1693 1694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1696 1697 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1698 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1699 xr += w; yr += h; xl = -w; yl = -h; 1700 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1701 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1702 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatView_SeqBAIJ" 1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1709 { 1710 PetscErrorCode ierr; 1711 PetscTruth iascii,isbinary,isdraw; 1712 1713 PetscFunctionBegin; 1714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1717 if (iascii){ 1718 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1719 } else if (isbinary) { 1720 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1721 } else if (isdraw) { 1722 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1723 } else { 1724 Mat B; 1725 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1726 ierr = MatView(B,viewer);CHKERRQ(ierr); 1727 ierr = MatDestroy(B);CHKERRQ(ierr); 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1736 { 1737 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1738 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1739 PetscInt *ai = a->i,*ailen = a->ilen; 1740 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1741 MatScalar *ap,*aa = a->a; 1742 1743 PetscFunctionBegin; 1744 for (k=0; k<m; k++) { /* loop over rows */ 1745 row = im[k]; brow = row/bs; 1746 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1747 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1748 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1749 nrow = ailen[brow]; 1750 for (l=0; l<n; l++) { /* loop over columns */ 1751 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1752 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1753 col = in[l] ; 1754 bcol = col/bs; 1755 cidx = col%bs; 1756 ridx = row%bs; 1757 high = nrow; 1758 low = 0; /* assume unsorted */ 1759 while (high-low > 5) { 1760 t = (low+high)/2; 1761 if (rp[t] > bcol) high = t; 1762 else low = t; 1763 } 1764 for (i=low; i<high; i++) { 1765 if (rp[i] > bcol) break; 1766 if (rp[i] == bcol) { 1767 *v++ = ap[bs2*i+bs*cidx+ridx]; 1768 goto finished; 1769 } 1770 } 1771 *v++ = 0.0; 1772 finished:; 1773 } 1774 } 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #define CHUNKSIZE 10 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1782 { 1783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1784 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1785 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1786 PetscErrorCode ierr; 1787 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1788 PetscTruth roworiented=a->roworiented; 1789 const PetscScalar *value = v; 1790 MatScalar *ap,*aa = a->a,*bap; 1791 1792 PetscFunctionBegin; 1793 if (roworiented) { 1794 stepval = (n-1)*bs; 1795 } else { 1796 stepval = (m-1)*bs; 1797 } 1798 for (k=0; k<m; k++) { /* loop over added rows */ 1799 row = im[k]; 1800 if (row < 0) continue; 1801 #if defined(PETSC_USE_DEBUG) 1802 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1803 #endif 1804 rp = aj + ai[row]; 1805 ap = aa + bs2*ai[row]; 1806 rmax = imax[row]; 1807 nrow = ailen[row]; 1808 low = 0; 1809 high = nrow; 1810 for (l=0; l<n; l++) { /* loop over added columns */ 1811 if (in[l] < 0) continue; 1812 #if defined(PETSC_USE_DEBUG) 1813 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1814 #endif 1815 col = in[l]; 1816 if (roworiented) { 1817 value = v + k*(stepval+bs)*bs + l*bs; 1818 } else { 1819 value = v + l*(stepval+bs)*bs + k*bs; 1820 } 1821 if (col <= lastcol) low = 0; else high = nrow; 1822 lastcol = col; 1823 while (high-low > 7) { 1824 t = (low+high)/2; 1825 if (rp[t] > col) high = t; 1826 else low = t; 1827 } 1828 for (i=low; i<high; i++) { 1829 if (rp[i] > col) break; 1830 if (rp[i] == col) { 1831 bap = ap + bs2*i; 1832 if (roworiented) { 1833 if (is == ADD_VALUES) { 1834 for (ii=0; ii<bs; ii++,value+=stepval) { 1835 for (jj=ii; jj<bs2; jj+=bs) { 1836 bap[jj] += *value++; 1837 } 1838 } 1839 } else { 1840 for (ii=0; ii<bs; ii++,value+=stepval) { 1841 for (jj=ii; jj<bs2; jj+=bs) { 1842 bap[jj] = *value++; 1843 } 1844 } 1845 } 1846 } else { 1847 if (is == ADD_VALUES) { 1848 for (ii=0; ii<bs; ii++,value+=stepval) { 1849 for (jj=0; jj<bs; jj++) { 1850 *bap++ += *value++; 1851 } 1852 } 1853 } else { 1854 for (ii=0; ii<bs; ii++,value+=stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 *bap++ = *value++; 1857 } 1858 } 1859 } 1860 } 1861 goto noinsert2; 1862 } 1863 } 1864 if (nonew == 1) goto noinsert2; 1865 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1866 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1867 N = nrow++ - 1; high++; 1868 /* shift up all the later entries in this row */ 1869 for (ii=N; ii>=i; ii--) { 1870 rp[ii+1] = rp[ii]; 1871 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1872 } 1873 if (N >= i) { 1874 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1875 } 1876 rp[i] = col; 1877 bap = ap + bs2*i; 1878 if (roworiented) { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=ii; jj<bs2; jj+=bs) { 1881 bap[jj] = *value++; 1882 } 1883 } 1884 } else { 1885 for (ii=0; ii<bs; ii++,value+=stepval) { 1886 for (jj=0; jj<bs; jj++) { 1887 *bap++ = *value++; 1888 } 1889 } 1890 } 1891 noinsert2:; 1892 low = i; 1893 } 1894 ailen[row] = nrow; 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1902 { 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1904 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1905 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1906 PetscErrorCode ierr; 1907 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1908 MatScalar *aa = a->a,*ap; 1909 PetscReal ratio=0.6; 1910 1911 PetscFunctionBegin; 1912 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1913 1914 if (m) rmax = ailen[0]; 1915 for (i=1; i<mbs; i++) { 1916 /* move each row back by the amount of empty slots (fshift) before it*/ 1917 fshift += imax[i-1] - ailen[i-1]; 1918 rmax = PetscMax(rmax,ailen[i]); 1919 if (fshift) { 1920 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1921 N = ailen[i]; 1922 for (j=0; j<N; j++) { 1923 ip[j-fshift] = ip[j]; 1924 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1925 } 1926 } 1927 ai[i] = ai[i-1] + ailen[i-1]; 1928 } 1929 if (mbs) { 1930 fshift += imax[mbs-1] - ailen[mbs-1]; 1931 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1932 } 1933 /* reset ilen and imax for each row */ 1934 for (i=0; i<mbs; i++) { 1935 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1936 } 1937 a->nz = ai[mbs]; 1938 1939 /* diagonals may have moved, so kill the diagonal pointers */ 1940 a->idiagvalid = PETSC_FALSE; 1941 if (fshift && a->diag) { 1942 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1944 a->diag = 0; 1945 } 1946 if (fshift && a->nounused == -1) { 1947 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1948 } 1949 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1950 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1952 a->reallocs = 0; 1953 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1954 1955 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1956 if (a->compressedrow.use){ 1957 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1958 } 1959 1960 A->same_nonzero = PETSC_TRUE; 1961 PetscFunctionReturn(0); 1962 } 1963 1964 /* 1965 This function returns an array of flags which indicate the locations of contiguous 1966 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1967 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1968 Assume: sizes should be long enough to hold all the values. 1969 */ 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1973 { 1974 PetscInt i,j,k,row; 1975 PetscTruth flg; 1976 1977 PetscFunctionBegin; 1978 for (i=0,j=0; i<n; j++) { 1979 row = idx[i]; 1980 if (row%bs!=0) { /* Not the begining of a block */ 1981 sizes[j] = 1; 1982 i++; 1983 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1984 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1985 i++; 1986 } else { /* Begining of the block, so check if the complete block exists */ 1987 flg = PETSC_TRUE; 1988 for (k=1; k<bs; k++) { 1989 if (row+k != idx[i+k]) { /* break in the block */ 1990 flg = PETSC_FALSE; 1991 break; 1992 } 1993 } 1994 if (flg) { /* No break in the bs */ 1995 sizes[j] = bs; 1996 i+= bs; 1997 } else { 1998 sizes[j] = 1; 1999 i++; 2000 } 2001 } 2002 } 2003 *bs_max = j; 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2010 { 2011 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2012 PetscErrorCode ierr; 2013 PetscInt i,j,k,count,*rows; 2014 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2015 PetscScalar zero = 0.0; 2016 MatScalar *aa; 2017 2018 PetscFunctionBegin; 2019 /* Make a copy of the IS and sort it */ 2020 /* allocate memory for rows,sizes */ 2021 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2022 2023 /* copy IS values to rows, and sort them */ 2024 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2025 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2026 if (baij->keepnonzeropattern) { 2027 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2028 bs_max = is_n; 2029 A->same_nonzero = PETSC_TRUE; 2030 } else { 2031 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2032 A->same_nonzero = PETSC_FALSE; 2033 } 2034 2035 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2036 row = rows[j]; 2037 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2038 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2039 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2040 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2041 if (diag != 0.0) { 2042 if (baij->ilen[row/bs] > 0) { 2043 baij->ilen[row/bs] = 1; 2044 baij->j[baij->i[row/bs]] = row/bs; 2045 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2046 } 2047 /* Now insert all the diagonal values for this bs */ 2048 for (k=0; k<bs; k++) { 2049 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2050 } 2051 } else { /* (diag == 0.0) */ 2052 baij->ilen[row/bs] = 0; 2053 } /* end (diag == 0.0) */ 2054 } else { /* (sizes[i] != bs) */ 2055 #if defined (PETSC_USE_DEBUG) 2056 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2057 #endif 2058 for (k=0; k<count; k++) { 2059 aa[0] = zero; 2060 aa += bs; 2061 } 2062 if (diag != 0.0) { 2063 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2064 } 2065 } 2066 } 2067 2068 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2075 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2076 { 2077 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2078 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2079 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2080 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2081 PetscErrorCode ierr; 2082 PetscInt ridx,cidx,bs2=a->bs2; 2083 PetscTruth roworiented=a->roworiented; 2084 MatScalar *ap,value,*aa=a->a,*bap; 2085 2086 PetscFunctionBegin; 2087 if (v) PetscValidScalarPointer(v,6); 2088 for (k=0; k<m; k++) { /* loop over added rows */ 2089 row = im[k]; 2090 brow = row/bs; 2091 if (row < 0) continue; 2092 #if defined(PETSC_USE_DEBUG) 2093 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2094 #endif 2095 rp = aj + ai[brow]; 2096 ap = aa + bs2*ai[brow]; 2097 rmax = imax[brow]; 2098 nrow = ailen[brow]; 2099 low = 0; 2100 high = nrow; 2101 for (l=0; l<n; l++) { /* loop over added columns */ 2102 if (in[l] < 0) continue; 2103 #if defined(PETSC_USE_DEBUG) 2104 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2105 #endif 2106 col = in[l]; bcol = col/bs; 2107 ridx = row % bs; cidx = col % bs; 2108 if (roworiented) { 2109 value = v[l + k*n]; 2110 } else { 2111 value = v[k + l*m]; 2112 } 2113 if (col <= lastcol) low = 0; else high = nrow; 2114 lastcol = col; 2115 while (high-low > 7) { 2116 t = (low+high)/2; 2117 if (rp[t] > bcol) high = t; 2118 else low = t; 2119 } 2120 for (i=low; i<high; i++) { 2121 if (rp[i] > bcol) break; 2122 if (rp[i] == bcol) { 2123 bap = ap + bs2*i + bs*cidx + ridx; 2124 if (is == ADD_VALUES) *bap += value; 2125 else *bap = value; 2126 goto noinsert1; 2127 } 2128 } 2129 if (nonew == 1) goto noinsert1; 2130 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2131 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2132 N = nrow++ - 1; high++; 2133 /* shift up all the later entries in this row */ 2134 for (ii=N; ii>=i; ii--) { 2135 rp[ii+1] = rp[ii]; 2136 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2137 } 2138 if (N>=i) { 2139 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2140 } 2141 rp[i] = bcol; 2142 ap[bs2*i + bs*cidx + ridx] = value; 2143 a->nz++; 2144 noinsert1:; 2145 low = i; 2146 } 2147 ailen[brow] = nrow; 2148 } 2149 A->same_nonzero = PETSC_FALSE; 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2155 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2156 { 2157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2158 Mat outA; 2159 PetscErrorCode ierr; 2160 PetscTruth row_identity,col_identity; 2161 2162 PetscFunctionBegin; 2163 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2164 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2165 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2166 if (!row_identity || !col_identity) { 2167 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2168 } 2169 2170 outA = inA; 2171 inA->factor = MAT_FACTOR_LU; 2172 2173 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2174 2175 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2176 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2177 a->row = row; 2178 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2179 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2180 a->col = col; 2181 2182 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2183 if (a->icol) { 2184 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2185 } 2186 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2187 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2188 2189 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2190 if (!a->solve_work) { 2191 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2192 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2193 } 2194 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2195 2196 PetscFunctionReturn(0); 2197 } 2198 2199 EXTERN_C_BEGIN 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2203 { 2204 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2205 PetscInt i,nz,mbs; 2206 2207 PetscFunctionBegin; 2208 nz = baij->maxnz/baij->bs2; 2209 mbs = baij->mbs; 2210 for (i=0; i<nz; i++) { 2211 baij->j[i] = indices[i]; 2212 } 2213 baij->nz = nz; 2214 for (i=0; i<mbs; i++) { 2215 baij->ilen[i] = baij->imax[i]; 2216 } 2217 PetscFunctionReturn(0); 2218 } 2219 EXTERN_C_END 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2223 /*@ 2224 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2225 in the matrix. 2226 2227 Input Parameters: 2228 + mat - the SeqBAIJ matrix 2229 - indices - the column indices 2230 2231 Level: advanced 2232 2233 Notes: 2234 This can be called if you have precomputed the nonzero structure of the 2235 matrix and want to provide it to the matrix object to improve the performance 2236 of the MatSetValues() operation. 2237 2238 You MUST have set the correct numbers of nonzeros per row in the call to 2239 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2240 2241 MUST be called before any calls to MatSetValues(); 2242 2243 @*/ 2244 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2245 { 2246 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2247 2248 PetscFunctionBegin; 2249 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2250 PetscValidPointer(indices,2); 2251 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2252 if (f) { 2253 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2254 } else { 2255 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2262 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2263 { 2264 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2265 PetscErrorCode ierr; 2266 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2267 PetscReal atmp; 2268 PetscScalar *x,zero = 0.0; 2269 MatScalar *aa; 2270 PetscInt ncols,brow,krow,kcol; 2271 2272 PetscFunctionBegin; 2273 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2274 bs = A->rmap->bs; 2275 aa = a->a; 2276 ai = a->i; 2277 aj = a->j; 2278 mbs = a->mbs; 2279 2280 ierr = VecSet(v,zero);CHKERRQ(ierr); 2281 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2282 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2283 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2284 for (i=0; i<mbs; i++) { 2285 ncols = ai[1] - ai[0]; ai++; 2286 brow = bs*i; 2287 for (j=0; j<ncols; j++){ 2288 for (kcol=0; kcol<bs; kcol++){ 2289 for (krow=0; krow<bs; krow++){ 2290 atmp = PetscAbsScalar(*aa);aa++; 2291 row = brow + krow; /* row index */ 2292 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2293 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2294 } 2295 } 2296 aj++; 2297 } 2298 } 2299 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatCopy_SeqBAIJ" 2305 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2306 { 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 /* If the two matrices have the same copy implementation, use fast copy. */ 2311 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2312 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2313 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2314 2315 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2316 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2317 } 2318 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2319 } else { 2320 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2321 } 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2327 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2328 { 2329 PetscErrorCode ierr; 2330 2331 PetscFunctionBegin; 2332 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2338 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2339 { 2340 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2341 PetscFunctionBegin; 2342 *array = a->a; 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2348 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2349 { 2350 PetscFunctionBegin; 2351 PetscFunctionReturn(0); 2352 } 2353 2354 #include "petscblaslapack.h" 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2357 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2358 { 2359 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2360 PetscErrorCode ierr; 2361 PetscInt i,bs=Y->rmap->bs,j,bs2; 2362 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2363 2364 PetscFunctionBegin; 2365 if (str == SAME_NONZERO_PATTERN) { 2366 PetscScalar alpha = a; 2367 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2368 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2369 if (y->xtoy && y->XtoY != X) { 2370 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2371 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2372 } 2373 if (!y->xtoy) { /* get xtoy */ 2374 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2375 y->XtoY = X; 2376 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2377 } 2378 bs2 = bs*bs; 2379 for (i=0; i<x->nz; i++) { 2380 j = 0; 2381 while (j < bs2){ 2382 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2383 j++; 2384 } 2385 } 2386 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2387 } else { 2388 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2395 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2396 { 2397 PetscInt rbs,cbs; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2402 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2403 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2404 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2410 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2411 { 2412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2413 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2414 MatScalar *aa = a->a; 2415 2416 PetscFunctionBegin; 2417 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2423 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2424 { 2425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2426 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2427 MatScalar *aa = a->a; 2428 2429 PetscFunctionBegin; 2430 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2438 /* 2439 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2440 */ 2441 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2442 { 2443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2444 PetscErrorCode ierr; 2445 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2446 PetscInt nz = a->i[m],row,*jj,mr,col; 2447 2448 PetscFunctionBegin; 2449 *nn = n; 2450 if (!ia) PetscFunctionReturn(0); 2451 if (symmetric) { 2452 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2453 } else { 2454 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2455 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2456 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2457 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2458 jj = a->j; 2459 for (i=0; i<nz; i++) { 2460 collengths[jj[i]]++; 2461 } 2462 cia[0] = oshift; 2463 for (i=0; i<n; i++) { 2464 cia[i+1] = cia[i] + collengths[i]; 2465 } 2466 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2467 jj = a->j; 2468 for (row=0; row<m; row++) { 2469 mr = a->i[row+1] - a->i[row]; 2470 for (i=0; i<mr; i++) { 2471 col = *jj++; 2472 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2473 } 2474 } 2475 ierr = PetscFree(collengths);CHKERRQ(ierr); 2476 *ia = cia; *ja = cja; 2477 } 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2483 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2484 { 2485 PetscErrorCode ierr; 2486 2487 PetscFunctionBegin; 2488 if (!ia) PetscFunctionReturn(0); 2489 ierr = PetscFree(*ia);CHKERRQ(ierr); 2490 ierr = PetscFree(*ja);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 #undef __FUNCT__ 2495 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2496 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2497 { 2498 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2499 PetscErrorCode ierr; 2500 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2501 PetscScalar dx,*y,*xx,*w3_array; 2502 PetscScalar *vscale_array; 2503 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2504 Vec w1=coloring->w1,w2=coloring->w2,w3; 2505 void *fctx = coloring->fctx; 2506 PetscTruth flg = PETSC_FALSE; 2507 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2508 Vec x1_tmp; 2509 2510 PetscFunctionBegin; 2511 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2512 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2513 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2514 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2515 2516 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2517 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2518 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2519 if (flg) { 2520 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2521 } else { 2522 PetscTruth assembled; 2523 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2524 if (assembled) { 2525 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2526 } 2527 } 2528 2529 x1_tmp = x1; 2530 if (!coloring->vscale){ 2531 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2532 } 2533 2534 /* 2535 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2536 coloring->F for the coarser grids from the finest 2537 */ 2538 if (coloring->F) { 2539 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2540 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2541 if (m1 != m2) { 2542 coloring->F = 0; 2543 } 2544 } 2545 2546 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2547 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2548 } 2549 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2550 2551 /* Set w1 = F(x1) */ 2552 if (coloring->F) { 2553 w1 = coloring->F; /* use already computed value of function */ 2554 coloring->F = 0; 2555 } else { 2556 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2557 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2558 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2559 } 2560 2561 if (!coloring->w3) { 2562 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2563 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2564 } 2565 w3 = coloring->w3; 2566 2567 CHKMEMQ; 2568 /* Compute all the local scale factors, including ghost points */ 2569 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2570 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2571 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2572 if (ctype == IS_COLORING_GHOSTED){ 2573 col_start = 0; col_end = N; 2574 } else if (ctype == IS_COLORING_GLOBAL){ 2575 xx = xx - start; 2576 vscale_array = vscale_array - start; 2577 col_start = start; col_end = N + start; 2578 } CHKMEMQ; 2579 for (col=col_start; col<col_end; col++){ 2580 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2581 if (coloring->htype[0] == 'w') { 2582 dx = 1.0 + unorm; 2583 } else { 2584 dx = xx[col]; 2585 } 2586 if (dx == 0.0) dx = 1.0; 2587 #if !defined(PETSC_USE_COMPLEX) 2588 if (dx < umin && dx >= 0.0) dx = umin; 2589 else if (dx < 0.0 && dx > -umin) dx = -umin; 2590 #else 2591 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2592 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2593 #endif 2594 dx *= epsilon; 2595 vscale_array[col] = 1.0/dx; 2596 } CHKMEMQ; 2597 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2598 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2599 if (ctype == IS_COLORING_GLOBAL){ 2600 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2601 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2602 } 2603 CHKMEMQ; 2604 if (coloring->vscaleforrow) { 2605 vscaleforrow = coloring->vscaleforrow; 2606 } else { 2607 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2608 } 2609 2610 2611 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2612 /* 2613 Loop over each color 2614 */ 2615 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2616 for (k=0; k<coloring->ncolors; k++) { 2617 coloring->currentcolor = k; 2618 for (i=0; i<bs; i++) { 2619 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2620 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2621 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2622 /* 2623 Loop over each column associated with color 2624 adding the perturbation to the vector w3. 2625 */ 2626 for (l=0; l<coloring->ncolumns[k]; l++) { 2627 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2628 if (coloring->htype[0] == 'w') { 2629 dx = 1.0 + unorm; 2630 } else { 2631 dx = xx[col]; 2632 } 2633 if (dx == 0.0) dx = 1.0; 2634 #if !defined(PETSC_USE_COMPLEX) 2635 if (dx < umin && dx >= 0.0) dx = umin; 2636 else if (dx < 0.0 && dx > -umin) dx = -umin; 2637 #else 2638 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2639 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2640 #endif 2641 dx *= epsilon; 2642 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2643 w3_array[col] += dx; 2644 } 2645 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2646 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2647 2648 /* 2649 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2650 w2 = F(x1 + dx) - F(x1) 2651 */ 2652 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2653 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2654 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2655 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2656 2657 /* 2658 Loop over rows of vector, putting results into Jacobian matrix 2659 */ 2660 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2661 for (l=0; l<coloring->nrows[k]; l++) { 2662 row = bs*coloring->rows[k][l]; /* local row index */ 2663 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2664 for (j=0; j<bs; j++) { 2665 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2666 srows[j] = row + start + j; 2667 } 2668 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2669 } 2670 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2671 } 2672 } /* endof for each color */ 2673 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2674 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2675 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2676 ierr = PetscFree(srows);CHKERRQ(ierr); 2677 2678 coloring->currentcolor = -1; 2679 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2681 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2682 PetscFunctionReturn(0); 2683 } 2684 2685 /* -------------------------------------------------------------------*/ 2686 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2687 MatGetRow_SeqBAIJ, 2688 MatRestoreRow_SeqBAIJ, 2689 MatMult_SeqBAIJ_N, 2690 /* 4*/ MatMultAdd_SeqBAIJ_N, 2691 MatMultTranspose_SeqBAIJ, 2692 MatMultTransposeAdd_SeqBAIJ, 2693 0, 2694 0, 2695 0, 2696 /*10*/ 0, 2697 MatLUFactor_SeqBAIJ, 2698 0, 2699 0, 2700 MatTranspose_SeqBAIJ, 2701 /*15*/ MatGetInfo_SeqBAIJ, 2702 MatEqual_SeqBAIJ, 2703 MatGetDiagonal_SeqBAIJ, 2704 MatDiagonalScale_SeqBAIJ, 2705 MatNorm_SeqBAIJ, 2706 /*20*/ 0, 2707 MatAssemblyEnd_SeqBAIJ, 2708 MatSetOption_SeqBAIJ, 2709 MatZeroEntries_SeqBAIJ, 2710 /*24*/ MatZeroRows_SeqBAIJ, 2711 0, 2712 0, 2713 0, 2714 0, 2715 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2716 0, 2717 0, 2718 MatGetArray_SeqBAIJ, 2719 MatRestoreArray_SeqBAIJ, 2720 /*34*/ MatDuplicate_SeqBAIJ, 2721 0, 2722 0, 2723 MatILUFactor_SeqBAIJ, 2724 0, 2725 /*39*/ MatAXPY_SeqBAIJ, 2726 MatGetSubMatrices_SeqBAIJ, 2727 MatIncreaseOverlap_SeqBAIJ, 2728 MatGetValues_SeqBAIJ, 2729 MatCopy_SeqBAIJ, 2730 /*44*/ 0, 2731 MatScale_SeqBAIJ, 2732 0, 2733 0, 2734 0, 2735 /*49*/ MatSetBlockSize_SeqBAIJ, 2736 MatGetRowIJ_SeqBAIJ, 2737 MatRestoreRowIJ_SeqBAIJ, 2738 MatGetColumnIJ_SeqBAIJ, 2739 MatRestoreColumnIJ_SeqBAIJ, 2740 /*54*/ MatFDColoringCreate_SeqAIJ, 2741 0, 2742 0, 2743 0, 2744 MatSetValuesBlocked_SeqBAIJ, 2745 /*59*/ MatGetSubMatrix_SeqBAIJ, 2746 MatDestroy_SeqBAIJ, 2747 MatView_SeqBAIJ, 2748 0, 2749 0, 2750 /*64*/ 0, 2751 0, 2752 0, 2753 0, 2754 0, 2755 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2756 0, 2757 MatConvert_Basic, 2758 0, 2759 0, 2760 /*74*/ 0, 2761 MatFDColoringApply_BAIJ, 2762 0, 2763 0, 2764 0, 2765 /*79*/ 0, 2766 0, 2767 0, 2768 0, 2769 MatLoad_SeqBAIJ, 2770 /*84*/ 0, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*89*/ 0, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*94*/ 0, 2781 0, 2782 0, 2783 0, 2784 0, 2785 /*99*/0, 2786 0, 2787 0, 2788 0, 2789 0, 2790 /*104*/0, 2791 MatRealPart_SeqBAIJ, 2792 MatImaginaryPart_SeqBAIJ, 2793 0, 2794 0, 2795 /*109*/0, 2796 0, 2797 0, 2798 0, 2799 MatMissingDiagonal_SeqBAIJ, 2800 /*114*/0, 2801 0, 2802 0, 2803 0, 2804 0, 2805 /*119*/0, 2806 0, 2807 MatMultHermitianTranspose_SeqBAIJ, 2808 MatMultHermitianTransposeAdd_SeqBAIJ 2809 }; 2810 2811 EXTERN_C_BEGIN 2812 #undef __FUNCT__ 2813 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2814 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2815 { 2816 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2817 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2818 PetscErrorCode ierr; 2819 2820 PetscFunctionBegin; 2821 if (aij->nonew != 1) { 2822 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2823 } 2824 2825 /* allocate space for values if not already there */ 2826 if (!aij->saved_values) { 2827 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2828 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2829 } 2830 2831 /* copy values over */ 2832 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 EXTERN_C_END 2836 2837 EXTERN_C_BEGIN 2838 #undef __FUNCT__ 2839 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2840 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2841 { 2842 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2843 PetscErrorCode ierr; 2844 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2845 2846 PetscFunctionBegin; 2847 if (aij->nonew != 1) { 2848 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2849 } 2850 if (!aij->saved_values) { 2851 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2852 } 2853 2854 /* copy values over */ 2855 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 EXTERN_C_END 2859 2860 EXTERN_C_BEGIN 2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2862 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2863 EXTERN_C_END 2864 2865 EXTERN_C_BEGIN 2866 #undef __FUNCT__ 2867 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2868 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2869 { 2870 Mat_SeqBAIJ *b; 2871 PetscErrorCode ierr; 2872 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2873 PetscTruth flg,skipallocation = PETSC_FALSE; 2874 2875 PetscFunctionBegin; 2876 2877 if (nz == MAT_SKIP_ALLOCATION) { 2878 skipallocation = PETSC_TRUE; 2879 nz = 0; 2880 } 2881 2882 if (bs < 0) { 2883 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2884 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2885 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2886 bs = PetscAbs(bs); 2887 } 2888 if (nnz && newbs != bs) { 2889 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2890 } 2891 bs = newbs; 2892 2893 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2894 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2895 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2896 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2897 2898 B->preallocated = PETSC_TRUE; 2899 2900 mbs = B->rmap->n/bs; 2901 nbs = B->cmap->n/bs; 2902 bs2 = bs*bs; 2903 2904 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2905 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2906 } 2907 2908 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2909 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2910 if (nnz) { 2911 for (i=0; i<mbs; i++) { 2912 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2913 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2914 } 2915 } 2916 2917 b = (Mat_SeqBAIJ*)B->data; 2918 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2919 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2920 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2921 2922 if (!flg) { 2923 switch (bs) { 2924 case 1: 2925 B->ops->mult = MatMult_SeqBAIJ_1; 2926 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2927 B->ops->sor = MatSOR_SeqBAIJ_1; 2928 break; 2929 case 2: 2930 B->ops->mult = MatMult_SeqBAIJ_2; 2931 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2932 B->ops->sor = MatSOR_SeqBAIJ_2; 2933 break; 2934 case 3: 2935 B->ops->mult = MatMult_SeqBAIJ_3; 2936 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2937 B->ops->sor = MatSOR_SeqBAIJ_3; 2938 break; 2939 case 4: 2940 B->ops->mult = MatMult_SeqBAIJ_4; 2941 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2942 B->ops->sor = MatSOR_SeqBAIJ_4; 2943 break; 2944 case 5: 2945 B->ops->mult = MatMult_SeqBAIJ_5; 2946 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2947 B->ops->sor = MatSOR_SeqBAIJ_5; 2948 break; 2949 case 6: 2950 B->ops->mult = MatMult_SeqBAIJ_6; 2951 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2952 B->ops->sor = MatSOR_SeqBAIJ_6; 2953 break; 2954 case 7: 2955 B->ops->mult = MatMult_SeqBAIJ_7; 2956 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2957 B->ops->sor = MatSOR_SeqBAIJ_7; 2958 break; 2959 default: 2960 B->ops->mult = MatMult_SeqBAIJ_N; 2961 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2962 break; 2963 } 2964 } 2965 B->rmap->bs = bs; 2966 b->mbs = mbs; 2967 b->nbs = nbs; 2968 if (!skipallocation) { 2969 if (!b->imax) { 2970 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2971 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2972 b->free_imax_ilen = PETSC_TRUE; 2973 } 2974 /* b->ilen will count nonzeros in each block row so far. */ 2975 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2976 if (!nnz) { 2977 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2978 else if (nz <= 0) nz = 1; 2979 for (i=0; i<mbs; i++) b->imax[i] = nz; 2980 nz = nz*mbs; 2981 } else { 2982 nz = 0; 2983 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2984 } 2985 2986 /* allocate the matrix space */ 2987 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2988 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2989 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2990 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2991 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2992 b->singlemalloc = PETSC_TRUE; 2993 b->i[0] = 0; 2994 for (i=1; i<mbs+1; i++) { 2995 b->i[i] = b->i[i-1] + b->imax[i-1]; 2996 } 2997 b->free_a = PETSC_TRUE; 2998 b->free_ij = PETSC_TRUE; 2999 } else { 3000 b->free_a = PETSC_FALSE; 3001 b->free_ij = PETSC_FALSE; 3002 } 3003 3004 B->rmap->bs = bs; 3005 b->bs2 = bs2; 3006 b->mbs = mbs; 3007 b->nz = 0; 3008 b->maxnz = nz*bs2; 3009 B->info.nz_unneeded = (PetscReal)b->maxnz; 3010 PetscFunctionReturn(0); 3011 } 3012 EXTERN_C_END 3013 3014 EXTERN_C_BEGIN 3015 #undef __FUNCT__ 3016 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3017 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3018 { 3019 PetscInt i,m,nz,nz_max=0,*nnz; 3020 PetscScalar *values=0; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 3025 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3026 3027 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3028 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3029 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3030 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3031 m = B->rmap->n/bs; 3032 3033 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3034 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3035 for(i=0; i<m; i++) { 3036 nz = ii[i+1]- ii[i]; 3037 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3038 nz_max = PetscMax(nz_max, nz); 3039 nnz[i] = nz; 3040 } 3041 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3042 ierr = PetscFree(nnz);CHKERRQ(ierr); 3043 3044 values = (PetscScalar*)V; 3045 if (!values) { 3046 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3047 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3048 } 3049 for (i=0; i<m; i++) { 3050 PetscInt ncols = ii[i+1] - ii[i]; 3051 const PetscInt *icols = jj + ii[i]; 3052 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3053 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3054 } 3055 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3056 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3058 3059 PetscFunctionReturn(0); 3060 } 3061 EXTERN_C_END 3062 3063 3064 EXTERN_C_BEGIN 3065 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3066 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3067 EXTERN_C_END 3068 3069 /*MC 3070 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3071 block sparse compressed row format. 3072 3073 Options Database Keys: 3074 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3075 3076 Level: beginner 3077 3078 .seealso: MatCreateSeqBAIJ() 3079 M*/ 3080 3081 3082 EXTERN_C_BEGIN 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatCreate_SeqBAIJ" 3085 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3086 { 3087 PetscErrorCode ierr; 3088 PetscMPIInt size; 3089 Mat_SeqBAIJ *b; 3090 3091 PetscFunctionBegin; 3092 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3093 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3094 3095 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3096 B->data = (void*)b; 3097 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3098 B->mapping = 0; 3099 b->row = 0; 3100 b->col = 0; 3101 b->icol = 0; 3102 b->reallocs = 0; 3103 b->saved_values = 0; 3104 3105 b->roworiented = PETSC_TRUE; 3106 b->nonew = 0; 3107 b->diag = 0; 3108 b->solve_work = 0; 3109 b->mult_work = 0; 3110 B->spptr = 0; 3111 B->info.nz_unneeded = (PetscReal)b->maxnz; 3112 b->keepnonzeropattern = PETSC_FALSE; 3113 b->xtoy = 0; 3114 b->XtoY = 0; 3115 b->compressedrow.use = PETSC_FALSE; 3116 b->compressedrow.nrows = 0; 3117 b->compressedrow.i = PETSC_NULL; 3118 b->compressedrow.rindex = PETSC_NULL; 3119 b->compressedrow.checked = PETSC_FALSE; 3120 B->same_nonzero = PETSC_FALSE; 3121 3122 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3123 "MatGetFactorAvailable_seqbaij_petsc", 3124 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3126 "MatGetFactor_seqbaij_petsc", 3127 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3129 "MatInvertBlockDiagonal_SeqBAIJ", 3130 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3132 "MatStoreValues_SeqBAIJ", 3133 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3135 "MatRetrieveValues_SeqBAIJ", 3136 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3137 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3138 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3139 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3140 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3141 "MatConvert_SeqBAIJ_SeqAIJ", 3142 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3143 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3144 "MatConvert_SeqBAIJ_SeqSBAIJ", 3145 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3146 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3147 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3148 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3149 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3150 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3151 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3152 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3153 PetscFunctionReturn(0); 3154 } 3155 EXTERN_C_END 3156 3157 #undef __FUNCT__ 3158 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3159 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3160 { 3161 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3162 PetscErrorCode ierr; 3163 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3164 3165 PetscFunctionBegin; 3166 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3167 3168 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3169 c->imax = a->imax; 3170 c->ilen = a->ilen; 3171 c->free_imax_ilen = PETSC_FALSE; 3172 } else { 3173 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3174 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3175 for (i=0; i<mbs; i++) { 3176 c->imax[i] = a->imax[i]; 3177 c->ilen[i] = a->ilen[i]; 3178 } 3179 c->free_imax_ilen = PETSC_TRUE; 3180 } 3181 3182 /* allocate the matrix space */ 3183 if (mallocmatspace){ 3184 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3185 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3186 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3187 c->singlemalloc = PETSC_FALSE; 3188 c->free_ij = PETSC_FALSE; 3189 c->i = a->i; 3190 c->j = a->j; 3191 c->parent = A; 3192 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3193 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3194 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3195 } else { 3196 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3197 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3198 c->singlemalloc = PETSC_TRUE; 3199 c->free_ij = PETSC_TRUE; 3200 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3201 if (mbs > 0) { 3202 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3203 if (cpvalues == MAT_COPY_VALUES) { 3204 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3205 } else { 3206 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3207 } 3208 } 3209 } 3210 } 3211 3212 c->roworiented = a->roworiented; 3213 c->nonew = a->nonew; 3214 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3215 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3216 c->bs2 = a->bs2; 3217 c->mbs = a->mbs; 3218 c->nbs = a->nbs; 3219 3220 if (a->diag) { 3221 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3222 c->diag = a->diag; 3223 c->free_diag = PETSC_FALSE; 3224 } else { 3225 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3226 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3227 for (i=0; i<mbs; i++) { 3228 c->diag[i] = a->diag[i]; 3229 } 3230 c->free_diag = PETSC_TRUE; 3231 } 3232 } else c->diag = 0; 3233 c->nz = a->nz; 3234 c->maxnz = a->maxnz; 3235 c->solve_work = 0; 3236 c->mult_work = 0; 3237 c->free_a = PETSC_TRUE; 3238 c->free_ij = PETSC_TRUE; 3239 C->preallocated = PETSC_TRUE; 3240 C->assembled = PETSC_TRUE; 3241 3242 c->compressedrow.use = a->compressedrow.use; 3243 c->compressedrow.nrows = a->compressedrow.nrows; 3244 c->compressedrow.checked = a->compressedrow.checked; 3245 if (a->compressedrow.checked && a->compressedrow.use){ 3246 i = a->compressedrow.nrows; 3247 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3248 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3249 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3250 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3251 } else { 3252 c->compressedrow.use = PETSC_FALSE; 3253 c->compressedrow.i = PETSC_NULL; 3254 c->compressedrow.rindex = PETSC_NULL; 3255 } 3256 C->same_nonzero = A->same_nonzero; 3257 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3258 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3259 PetscFunctionReturn(0); 3260 } 3261 3262 #undef __FUNCT__ 3263 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3264 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3265 { 3266 PetscErrorCode ierr; 3267 3268 PetscFunctionBegin; 3269 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3270 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3271 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3272 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3273 PetscFunctionReturn(0); 3274 } 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatLoad_SeqBAIJ" 3278 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3279 { 3280 Mat_SeqBAIJ *a; 3281 Mat B; 3282 PetscErrorCode ierr; 3283 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3284 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3285 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3286 PetscInt *masked,nmask,tmp,bs2,ishift; 3287 PetscMPIInt size; 3288 int fd; 3289 PetscScalar *aa; 3290 MPI_Comm comm = ((PetscObject)viewer)->comm; 3291 3292 PetscFunctionBegin; 3293 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3294 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3295 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3296 bs2 = bs*bs; 3297 3298 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3299 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3300 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3301 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3302 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3303 M = header[1]; N = header[2]; nz = header[3]; 3304 3305 if (header[3] < 0) { 3306 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3307 } 3308 3309 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3310 3311 /* 3312 This code adds extra rows to make sure the number of rows is 3313 divisible by the blocksize 3314 */ 3315 mbs = M/bs; 3316 extra_rows = bs - M + bs*(mbs); 3317 if (extra_rows == bs) extra_rows = 0; 3318 else mbs++; 3319 if (extra_rows) { 3320 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3321 } 3322 3323 /* read in row lengths */ 3324 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3325 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3326 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3327 3328 /* read in column indices */ 3329 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3330 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3331 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3332 3333 /* loop over row lengths determining block row lengths */ 3334 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3335 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3336 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3337 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3338 rowcount = 0; 3339 nzcount = 0; 3340 for (i=0; i<mbs; i++) { 3341 nmask = 0; 3342 for (j=0; j<bs; j++) { 3343 kmax = rowlengths[rowcount]; 3344 for (k=0; k<kmax; k++) { 3345 tmp = jj[nzcount++]/bs; 3346 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3347 } 3348 rowcount++; 3349 } 3350 browlengths[i] += nmask; 3351 /* zero out the mask elements we set */ 3352 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3353 } 3354 3355 /* create our matrix */ 3356 ierr = MatCreate(comm,&B); 3357 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3358 ierr = MatSetType(B,type);CHKERRQ(ierr); 3359 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3360 a = (Mat_SeqBAIJ*)B->data; 3361 3362 /* set matrix "i" values */ 3363 a->i[0] = 0; 3364 for (i=1; i<= mbs; i++) { 3365 a->i[i] = a->i[i-1] + browlengths[i-1]; 3366 a->ilen[i-1] = browlengths[i-1]; 3367 } 3368 a->nz = 0; 3369 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3370 3371 /* read in nonzero values */ 3372 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3373 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3374 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3375 3376 /* set "a" and "j" values into matrix */ 3377 nzcount = 0; jcount = 0; 3378 for (i=0; i<mbs; i++) { 3379 nzcountb = nzcount; 3380 nmask = 0; 3381 for (j=0; j<bs; j++) { 3382 kmax = rowlengths[i*bs+j]; 3383 for (k=0; k<kmax; k++) { 3384 tmp = jj[nzcount++]/bs; 3385 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3386 } 3387 } 3388 /* sort the masked values */ 3389 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3390 3391 /* set "j" values into matrix */ 3392 maskcount = 1; 3393 for (j=0; j<nmask; j++) { 3394 a->j[jcount++] = masked[j]; 3395 mask[masked[j]] = maskcount++; 3396 } 3397 /* set "a" values into matrix */ 3398 ishift = bs2*a->i[i]; 3399 for (j=0; j<bs; j++) { 3400 kmax = rowlengths[i*bs+j]; 3401 for (k=0; k<kmax; k++) { 3402 tmp = jj[nzcountb]/bs ; 3403 block = mask[tmp] - 1; 3404 point = jj[nzcountb] - bs*tmp; 3405 idx = ishift + bs2*block + j + bs*point; 3406 a->a[idx] = (MatScalar)aa[nzcountb++]; 3407 } 3408 } 3409 /* zero out the mask elements we set */ 3410 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3411 } 3412 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3413 3414 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3415 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3416 ierr = PetscFree(aa);CHKERRQ(ierr); 3417 ierr = PetscFree(jj);CHKERRQ(ierr); 3418 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3419 3420 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3421 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3422 ierr = MatView_Private(B);CHKERRQ(ierr); 3423 3424 *A = B; 3425 PetscFunctionReturn(0); 3426 } 3427 3428 #undef __FUNCT__ 3429 #define __FUNCT__ "MatCreateSeqBAIJ" 3430 /*@C 3431 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3432 compressed row) format. For good matrix assembly performance the 3433 user should preallocate the matrix storage by setting the parameter nz 3434 (or the array nnz). By setting these parameters accurately, performance 3435 during matrix assembly can be increased by more than a factor of 50. 3436 3437 Collective on MPI_Comm 3438 3439 Input Parameters: 3440 + comm - MPI communicator, set to PETSC_COMM_SELF 3441 . bs - size of block 3442 . m - number of rows 3443 . n - number of columns 3444 . nz - number of nonzero blocks per block row (same for all rows) 3445 - nnz - array containing the number of nonzero blocks in the various block rows 3446 (possibly different for each block row) or PETSC_NULL 3447 3448 Output Parameter: 3449 . A - the matrix 3450 3451 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3452 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3453 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3454 3455 Options Database Keys: 3456 . -mat_no_unroll - uses code that does not unroll the loops in the 3457 block calculations (much slower) 3458 . -mat_block_size - size of the blocks to use 3459 3460 Level: intermediate 3461 3462 Notes: 3463 The number of rows and columns must be divisible by blocksize. 3464 3465 If the nnz parameter is given then the nz parameter is ignored 3466 3467 A nonzero block is any block that as 1 or more nonzeros in it 3468 3469 The block AIJ format is fully compatible with standard Fortran 77 3470 storage. That is, the stored row and column indices can begin at 3471 either one (as in Fortran) or zero. See the users' manual for details. 3472 3473 Specify the preallocated storage with either nz or nnz (not both). 3474 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3475 allocation. For additional details, see the users manual chapter on 3476 matrices. 3477 3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3479 @*/ 3480 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3481 { 3482 PetscErrorCode ierr; 3483 3484 PetscFunctionBegin; 3485 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3486 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3487 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3488 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3489 PetscFunctionReturn(0); 3490 } 3491 3492 #undef __FUNCT__ 3493 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3494 /*@C 3495 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3496 per row in the matrix. For good matrix assembly performance the 3497 user should preallocate the matrix storage by setting the parameter nz 3498 (or the array nnz). By setting these parameters accurately, performance 3499 during matrix assembly can be increased by more than a factor of 50. 3500 3501 Collective on MPI_Comm 3502 3503 Input Parameters: 3504 + A - the matrix 3505 . bs - size of block 3506 . nz - number of block nonzeros per block row (same for all rows) 3507 - nnz - array containing the number of block nonzeros in the various block rows 3508 (possibly different for each block row) or PETSC_NULL 3509 3510 Options Database Keys: 3511 . -mat_no_unroll - uses code that does not unroll the loops in the 3512 block calculations (much slower) 3513 . -mat_block_size - size of the blocks to use 3514 3515 Level: intermediate 3516 3517 Notes: 3518 If the nnz parameter is given then the nz parameter is ignored 3519 3520 You can call MatGetInfo() to get information on how effective the preallocation was; 3521 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3522 You can also run with the option -info and look for messages with the string 3523 malloc in them to see if additional memory allocation was needed. 3524 3525 The block AIJ format is fully compatible with standard Fortran 77 3526 storage. That is, the stored row and column indices can begin at 3527 either one (as in Fortran) or zero. See the users' manual for details. 3528 3529 Specify the preallocated storage with either nz or nnz (not both). 3530 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3531 allocation. For additional details, see the users manual chapter on 3532 matrices. 3533 3534 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3535 @*/ 3536 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3537 { 3538 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3539 3540 PetscFunctionBegin; 3541 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3542 if (f) { 3543 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3544 } 3545 PetscFunctionReturn(0); 3546 } 3547 3548 #undef __FUNCT__ 3549 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3550 /*@C 3551 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3552 (the default sequential PETSc format). 3553 3554 Collective on MPI_Comm 3555 3556 Input Parameters: 3557 + A - the matrix 3558 . i - the indices into j for the start of each local row (starts with zero) 3559 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3560 - v - optional values in the matrix 3561 3562 Level: developer 3563 3564 .keywords: matrix, aij, compressed row, sparse 3565 3566 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3567 @*/ 3568 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3569 { 3570 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3571 3572 PetscFunctionBegin; 3573 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3574 if (f) { 3575 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3576 } 3577 PetscFunctionReturn(0); 3578 } 3579 3580 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3583 /*@ 3584 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3585 (upper triangular entries in CSR format) provided by the user. 3586 3587 Collective on MPI_Comm 3588 3589 Input Parameters: 3590 + comm - must be an MPI communicator of size 1 3591 . bs - size of block 3592 . m - number of rows 3593 . n - number of columns 3594 . i - row indices 3595 . j - column indices 3596 - a - matrix values 3597 3598 Output Parameter: 3599 . mat - the matrix 3600 3601 Level: intermediate 3602 3603 Notes: 3604 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3605 once the matrix is destroyed 3606 3607 You cannot set new nonzero locations into this matrix, that will generate an error. 3608 3609 The i and j indices are 0 based 3610 3611 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3612 3613 @*/ 3614 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3615 { 3616 PetscErrorCode ierr; 3617 PetscInt ii; 3618 Mat_SeqBAIJ *baij; 3619 3620 PetscFunctionBegin; 3621 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3622 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3623 3624 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3625 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3626 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3627 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3628 baij = (Mat_SeqBAIJ*)(*mat)->data; 3629 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3630 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3631 3632 baij->i = i; 3633 baij->j = j; 3634 baij->a = a; 3635 baij->singlemalloc = PETSC_FALSE; 3636 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3637 baij->free_a = PETSC_FALSE; 3638 baij->free_ij = PETSC_FALSE; 3639 3640 for (ii=0; ii<m; ii++) { 3641 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3642 #if defined(PETSC_USE_DEBUG) 3643 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3644 #endif 3645 } 3646 #if defined(PETSC_USE_DEBUG) 3647 for (ii=0; ii<baij->i[m]; ii++) { 3648 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3649 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3650 } 3651 #endif 3652 3653 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 PetscFunctionReturn(0); 3656 } 3657