1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "petscblaslapack.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@ 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr,(*f)(Mat); 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 29 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 32 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 33 if (f) { 34 ierr = (*f)(mat);CHKERRQ(ierr); 35 } else { 36 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 EXTERN_C_BEGIN 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 45 { 46 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 47 PetscErrorCode ierr; 48 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5]; 49 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25]; 50 PetscReal shift = 0.0; 51 52 PetscFunctionBegin; 53 if (a->idiagvalid) PetscFunctionReturn(0); 54 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 55 diag_offset = a->diag; 56 if (!a->idiag) { 57 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 58 ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 default: 129 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 138 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 PetscScalar *x,x1,s1; 142 const PetscScalar *b; 143 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 144 PetscErrorCode ierr; 145 PetscInt m = a->mbs,i,i2,nz,j; 146 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 147 148 PetscFunctionBegin; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 its = its*lits; 151 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 152 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 153 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 154 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 155 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 156 157 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 158 159 diag = a->diag; 160 idiag = a->idiag; 161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 162 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 163 164 if (flag & SOR_ZERO_INITIAL_GUESS) { 165 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 166 x[0] = b[0]*idiag[0]; 167 i2 = 1; 168 idiag += 1; 169 for (i=1; i<m; i++) { 170 v = aa + ai[i]; 171 vi = aj + ai[i]; 172 nz = diag[i] - ai[i]; 173 s1 = b[i2]; 174 for (j=0; j<nz; j++) { 175 s1 -= v[j]*x[vi[j]]; 176 } 177 x[i2] = idiag[0]*s1; 178 idiag += 1; 179 i2 += 1; 180 } 181 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 182 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 183 } 184 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 185 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 186 i2 = 0; 187 mdiag = a->idiag+a->mbs; 188 for (i=0; i<m; i++) { 189 x1 = x[i2]; 190 x[i2] = mdiag[0]*x1; 191 mdiag += 1; 192 i2 += 1; 193 } 194 ierr = PetscLogFlops(m);CHKERRQ(ierr); 195 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 196 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 197 } 198 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 199 idiag = a->idiag+a->mbs - 1; 200 i2 = m - 1; 201 x1 = x[i2]; 202 x[i2] = idiag[0]*x1; 203 idiag -= 1; 204 i2 -= 1; 205 for (i=m-2; i>=0; i--) { 206 v = aa + (diag[i]+1); 207 vi = aj + diag[i] + 1; 208 nz = ai[i+1] - diag[i] - 1; 209 s1 = x[i2]; 210 for (j=0; j<nz; j++) { 211 s1 -= v[j]*x[vi[j]]; 212 } 213 x[i2] = idiag[0]*s1; 214 idiag -= 1; 215 i2 -= 1; 216 } 217 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 218 } 219 } else { 220 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 221 } 222 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 223 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 #undef __FUNCT__ 228 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 229 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 230 { 231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 232 PetscScalar *x,x1,x2,s1,s2; 233 const PetscScalar *b; 234 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 235 PetscErrorCode ierr; 236 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 237 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 238 239 PetscFunctionBegin; 240 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 241 its = its*lits; 242 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 243 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 244 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 245 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 246 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 247 248 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 249 250 diag = a->diag; 251 idiag = a->idiag; 252 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 253 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 254 255 if (flag & SOR_ZERO_INITIAL_GUESS) { 256 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 257 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 258 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 259 i2 = 2; 260 idiag += 4; 261 for (i=1; i<m; i++) { 262 v = aa + 4*ai[i]; 263 vi = aj + ai[i]; 264 nz = diag[i] - ai[i]; 265 s1 = b[i2]; s2 = b[i2+1]; 266 for (j=0; j<nz; j++) { 267 idx = 2*vi[j]; 268 it = 4*j; 269 x1 = x[idx]; x2 = x[1+idx]; 270 s1 -= v[it]*x1 + v[it+2]*x2; 271 s2 -= v[it+1]*x1 + v[it+3]*x2; 272 } 273 x[i2] = idiag[0]*s1 + idiag[2]*s2; 274 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 275 idiag += 4; 276 i2 += 2; 277 } 278 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 279 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 280 } 281 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 282 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 283 i2 = 0; 284 mdiag = a->idiag+4*a->mbs; 285 for (i=0; i<m; i++) { 286 x1 = x[i2]; x2 = x[i2+1]; 287 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 288 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 289 mdiag += 4; 290 i2 += 2; 291 } 292 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 293 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 294 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 295 } 296 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 297 idiag = a->idiag+4*a->mbs - 4; 298 i2 = 2*m - 2; 299 x1 = x[i2]; x2 = x[i2+1]; 300 x[i2] = idiag[0]*x1 + idiag[2]*x2; 301 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 302 idiag -= 4; 303 i2 -= 2; 304 for (i=m-2; i>=0; i--) { 305 v = aa + 4*(diag[i]+1); 306 vi = aj + diag[i] + 1; 307 nz = ai[i+1] - diag[i] - 1; 308 s1 = x[i2]; s2 = x[i2+1]; 309 for (j=0; j<nz; j++) { 310 idx = 2*vi[j]; 311 it = 4*j; 312 x1 = x[idx]; x2 = x[1+idx]; 313 s1 -= v[it]*x1 + v[it+2]*x2; 314 s2 -= v[it+1]*x1 + v[it+3]*x2; 315 } 316 x[i2] = idiag[0]*s1 + idiag[2]*s2; 317 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 318 idiag -= 4; 319 i2 -= 2; 320 } 321 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 322 } 323 } else { 324 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 325 } 326 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 327 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 333 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 334 { 335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 336 PetscScalar *x,x1,x2,x3,s1,s2,s3; 337 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 338 const PetscScalar *b; 339 PetscErrorCode ierr; 340 PetscInt m = a->mbs,i,i2,nz,idx; 341 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 342 343 PetscFunctionBegin; 344 its = its*lits; 345 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 346 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 347 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 348 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 349 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 350 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 351 352 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 353 354 diag = a->diag; 355 idiag = a->idiag; 356 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 357 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 358 359 if (flag & SOR_ZERO_INITIAL_GUESS) { 360 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 361 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 362 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 363 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 364 i2 = 3; 365 idiag += 9; 366 for (i=1; i<m; i++) { 367 v = aa + 9*ai[i]; 368 vi = aj + ai[i]; 369 nz = diag[i] - ai[i]; 370 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 371 while (nz--) { 372 idx = 3*(*vi++); 373 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 374 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 375 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 376 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 377 v += 9; 378 } 379 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 380 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 381 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 382 idiag += 9; 383 i2 += 3; 384 } 385 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 386 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 387 } 388 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 389 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 390 i2 = 0; 391 mdiag = a->idiag+9*a->mbs; 392 for (i=0; i<m; i++) { 393 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 394 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 395 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 396 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 397 mdiag += 9; 398 i2 += 3; 399 } 400 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 401 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 402 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 403 } 404 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 405 idiag = a->idiag+9*a->mbs - 9; 406 i2 = 3*m - 3; 407 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 408 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 409 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 410 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 411 idiag -= 9; 412 i2 -= 3; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 9*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 418 while (nz--) { 419 idx = 3*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 421 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 422 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 423 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 424 v += 9; 425 } 426 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 427 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 428 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 429 idiag -= 9; 430 i2 -= 3; 431 } 432 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 433 } 434 } else { 435 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 436 } 437 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 438 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 444 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 445 { 446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 447 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 448 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 449 const PetscScalar *b; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 476 i2 = 4; 477 idiag += 16; 478 for (i=1; i<m; i++) { 479 v = aa + 16*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 483 while (nz--) { 484 idx = 4*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 486 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 487 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 488 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 489 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 490 v += 16; 491 } 492 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 493 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 494 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 495 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 496 idiag += 16; 497 i2 += 4; 498 } 499 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 500 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 501 } 502 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 503 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 504 i2 = 0; 505 mdiag = a->idiag+16*a->mbs; 506 for (i=0; i<m; i++) { 507 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 508 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 509 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 510 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 511 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 512 mdiag += 16; 513 i2 += 4; 514 } 515 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 516 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 517 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 518 } 519 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 520 idiag = a->idiag+16*a->mbs - 16; 521 i2 = 4*m - 4; 522 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 523 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 524 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 525 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 526 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 527 idiag -= 16; 528 i2 -= 4; 529 for (i=m-2; i>=0; i--) { 530 v = aa + 16*(diag[i]+1); 531 vi = aj + diag[i] + 1; 532 nz = ai[i+1] - diag[i] - 1; 533 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 534 while (nz--) { 535 idx = 4*(*vi++); 536 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 537 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 538 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 539 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 540 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 541 v += 16; 542 } 543 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 544 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 545 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 546 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 547 idiag -= 16; 548 i2 -= 4; 549 } 550 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 551 } 552 } else { 553 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 554 } 555 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 556 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 562 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 563 { 564 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 565 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 566 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 567 const PetscScalar *b; 568 PetscErrorCode ierr; 569 PetscInt m = a->mbs,i,i2,nz,idx; 570 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 571 572 PetscFunctionBegin; 573 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 574 its = its*lits; 575 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 576 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 577 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 578 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 579 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 580 581 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 582 583 diag = a->diag; 584 idiag = a->idiag; 585 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 586 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 587 588 if (flag & SOR_ZERO_INITIAL_GUESS) { 589 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 590 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 591 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 592 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 593 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 594 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 595 i2 = 5; 596 idiag += 25; 597 for (i=1; i<m; i++) { 598 v = aa + 25*ai[i]; 599 vi = aj + ai[i]; 600 nz = diag[i] - ai[i]; 601 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 602 while (nz--) { 603 idx = 5*(*vi++); 604 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 605 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 606 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 607 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 608 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 609 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 610 v += 25; 611 } 612 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 613 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 614 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 615 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 616 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 617 idiag += 25; 618 i2 += 5; 619 } 620 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 621 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 622 } 623 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 624 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 625 i2 = 0; 626 mdiag = a->idiag+25*a->mbs; 627 for (i=0; i<m; i++) { 628 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 629 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 630 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 631 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 632 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 633 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 634 mdiag += 25; 635 i2 += 5; 636 } 637 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 638 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 639 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 640 } 641 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 642 idiag = a->idiag+25*a->mbs - 25; 643 i2 = 5*m - 5; 644 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 645 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 646 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 647 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 648 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 649 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 650 idiag -= 25; 651 i2 -= 5; 652 for (i=m-2; i>=0; i--) { 653 v = aa + 25*(diag[i]+1); 654 vi = aj + diag[i] + 1; 655 nz = ai[i+1] - diag[i] - 1; 656 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 657 while (nz--) { 658 idx = 5*(*vi++); 659 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 660 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 661 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 662 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 663 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 664 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 665 v += 25; 666 } 667 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 668 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 669 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 670 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 671 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 672 idiag -= 25; 673 i2 -= 5; 674 } 675 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 676 } 677 } else { 678 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 679 } 680 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 681 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 687 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 688 { 689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 690 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 691 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 692 const PetscScalar *b; 693 PetscErrorCode ierr; 694 PetscInt m = a->mbs,i,i2,nz,idx; 695 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 696 697 PetscFunctionBegin; 698 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 699 its = its*lits; 700 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 701 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 702 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 703 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 704 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 705 706 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 707 708 diag = a->diag; 709 idiag = a->idiag; 710 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 711 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 712 713 if (flag & SOR_ZERO_INITIAL_GUESS) { 714 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 715 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 716 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 717 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 718 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 719 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 720 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 721 i2 = 6; 722 idiag += 36; 723 for (i=1; i<m; i++) { 724 v = aa + 36*ai[i]; 725 vi = aj + ai[i]; 726 nz = diag[i] - ai[i]; 727 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 728 while (nz--) { 729 idx = 6*(*vi++); 730 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 731 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 732 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 733 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 734 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 735 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 736 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 737 v += 36; 738 } 739 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 740 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 741 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 742 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 743 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 744 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 745 idiag += 36; 746 i2 += 6; 747 } 748 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 749 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 750 } 751 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 752 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 753 i2 = 0; 754 mdiag = a->idiag+36*a->mbs; 755 for (i=0; i<m; i++) { 756 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 757 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 758 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 759 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 760 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 761 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 762 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 763 mdiag += 36; 764 i2 += 6; 765 } 766 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 767 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 768 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 769 } 770 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 771 idiag = a->idiag+36*a->mbs - 36; 772 i2 = 6*m - 6; 773 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 774 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 775 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 776 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 777 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 778 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 779 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 780 idiag -= 36; 781 i2 -= 6; 782 for (i=m-2; i>=0; i--) { 783 v = aa + 36*(diag[i]+1); 784 vi = aj + diag[i] + 1; 785 nz = ai[i+1] - diag[i] - 1; 786 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 787 while (nz--) { 788 idx = 6*(*vi++); 789 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 790 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 791 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 792 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 793 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 794 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 795 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 796 v += 36; 797 } 798 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 799 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 800 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 801 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 802 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 803 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 804 idiag -= 36; 805 i2 -= 6; 806 } 807 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 808 } 809 } else { 810 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 811 } 812 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 813 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 819 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 820 { 821 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 822 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 823 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 824 const PetscScalar *b; 825 PetscErrorCode ierr; 826 PetscInt m = a->mbs,i,i2,nz,idx; 827 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 828 829 PetscFunctionBegin; 830 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 831 its = its*lits; 832 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 833 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 834 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 835 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 836 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 837 838 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 839 840 diag = a->diag; 841 idiag = a->idiag; 842 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 843 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 844 845 if (flag & SOR_ZERO_INITIAL_GUESS) { 846 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 847 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 848 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 849 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 850 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 851 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 852 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 853 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 854 i2 = 7; 855 idiag += 49; 856 for (i=1; i<m; i++) { 857 v = aa + 49*ai[i]; 858 vi = aj + ai[i]; 859 nz = diag[i] - ai[i]; 860 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 861 while (nz--) { 862 idx = 7*(*vi++); 863 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 864 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 865 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 866 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 867 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 868 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 869 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 870 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 871 v += 49; 872 } 873 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 874 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 875 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 876 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 877 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 878 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 879 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 880 idiag += 49; 881 i2 += 7; 882 } 883 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 884 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 885 } 886 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 887 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 888 i2 = 0; 889 mdiag = a->idiag+49*a->mbs; 890 for (i=0; i<m; i++) { 891 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 892 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 893 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 894 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 895 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 896 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 897 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 898 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 899 mdiag += 36; 900 i2 += 6; 901 } 902 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 903 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 904 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 905 } 906 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 907 idiag = a->idiag+49*a->mbs - 49; 908 i2 = 7*m - 7; 909 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 910 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 911 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 912 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 913 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 914 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 915 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 916 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 917 idiag -= 49; 918 i2 -= 7; 919 for (i=m-2; i>=0; i--) { 920 v = aa + 49*(diag[i]+1); 921 vi = aj + diag[i] + 1; 922 nz = ai[i+1] - diag[i] - 1; 923 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 924 while (nz--) { 925 idx = 7*(*vi++); 926 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 927 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 928 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 929 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 930 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 931 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 932 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 933 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 934 v += 49; 935 } 936 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 937 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 938 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 939 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 940 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 941 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 942 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 943 idiag -= 49; 944 i2 -= 7; 945 } 946 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 947 } 948 } else { 949 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 950 } 951 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 952 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 /* 957 Special version for direct calls from Fortran (Used in PETSc-fun3d) 958 */ 959 #if defined(PETSC_HAVE_FORTRAN_CAPS) 960 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 961 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 962 #define matsetvaluesblocked4_ matsetvaluesblocked4 963 #endif 964 965 EXTERN_C_BEGIN 966 #undef __FUNCT__ 967 #define __FUNCT__ "matsetvaluesblocked4_" 968 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 969 { 970 Mat A = *AA; 971 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 972 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 973 PetscInt *ai=a->i,*ailen=a->ilen; 974 PetscInt *aj=a->j,stepval,lastcol = -1; 975 const PetscScalar *value = v; 976 MatScalar *ap,*aa = a->a,*bap; 977 978 PetscFunctionBegin; 979 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 980 stepval = (n-1)*4; 981 for (k=0; k<m; k++) { /* loop over added rows */ 982 row = im[k]; 983 rp = aj + ai[row]; 984 ap = aa + 16*ai[row]; 985 nrow = ailen[row]; 986 low = 0; 987 high = nrow; 988 for (l=0; l<n; l++) { /* loop over added columns */ 989 col = in[l]; 990 if (col <= lastcol) low = 0; else high = nrow; 991 lastcol = col; 992 value = v + k*(stepval+4 + l)*4; 993 while (high-low > 7) { 994 t = (low+high)/2; 995 if (rp[t] > col) high = t; 996 else low = t; 997 } 998 for (i=low; i<high; i++) { 999 if (rp[i] > col) break; 1000 if (rp[i] == col) { 1001 bap = ap + 16*i; 1002 for (ii=0; ii<4; ii++,value+=stepval) { 1003 for (jj=ii; jj<16; jj+=4) { 1004 bap[jj] += *value++; 1005 } 1006 } 1007 goto noinsert2; 1008 } 1009 } 1010 N = nrow++ - 1; 1011 high++; /* added new column index thus must search to one higher than before */ 1012 /* shift up all the later entries in this row */ 1013 for (ii=N; ii>=i; ii--) { 1014 rp[ii+1] = rp[ii]; 1015 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1016 } 1017 if (N >= i) { 1018 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1019 } 1020 rp[i] = col; 1021 bap = ap + 16*i; 1022 for (ii=0; ii<4; ii++,value+=stepval) { 1023 for (jj=ii; jj<16; jj+=4) { 1024 bap[jj] = *value++; 1025 } 1026 } 1027 noinsert2:; 1028 low = i; 1029 } 1030 ailen[row] = nrow; 1031 } 1032 PetscFunctionReturnVoid(); 1033 } 1034 EXTERN_C_END 1035 1036 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1037 #define matsetvalues4_ MATSETVALUES4 1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1039 #define matsetvalues4_ matsetvalues4 1040 #endif 1041 1042 EXTERN_C_BEGIN 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatSetValues4_" 1045 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1046 { 1047 Mat A = *AA; 1048 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1049 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1050 PetscInt *ai=a->i,*ailen=a->ilen; 1051 PetscInt *aj=a->j,brow,bcol; 1052 PetscInt ridx,cidx,lastcol = -1; 1053 MatScalar *ap,value,*aa=a->a,*bap; 1054 1055 PetscFunctionBegin; 1056 for (k=0; k<m; k++) { /* loop over added rows */ 1057 row = im[k]; brow = row/4; 1058 rp = aj + ai[brow]; 1059 ap = aa + 16*ai[brow]; 1060 nrow = ailen[brow]; 1061 low = 0; 1062 high = nrow; 1063 for (l=0; l<n; l++) { /* loop over added columns */ 1064 col = in[l]; bcol = col/4; 1065 ridx = row % 4; cidx = col % 4; 1066 value = v[l + k*n]; 1067 if (col <= lastcol) low = 0; else high = nrow; 1068 lastcol = col; 1069 while (high-low > 7) { 1070 t = (low+high)/2; 1071 if (rp[t] > bcol) high = t; 1072 else low = t; 1073 } 1074 for (i=low; i<high; i++) { 1075 if (rp[i] > bcol) break; 1076 if (rp[i] == bcol) { 1077 bap = ap + 16*i + 4*cidx + ridx; 1078 *bap += value; 1079 goto noinsert1; 1080 } 1081 } 1082 N = nrow++ - 1; 1083 high++; /* added new column thus must search to one higher than before */ 1084 /* shift up all the later entries in this row */ 1085 for (ii=N; ii>=i; ii--) { 1086 rp[ii+1] = rp[ii]; 1087 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1088 } 1089 if (N>=i) { 1090 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1091 } 1092 rp[i] = bcol; 1093 ap[16*i + 4*cidx + ridx] = value; 1094 noinsert1:; 1095 low = i; 1096 } 1097 ailen[brow] = nrow; 1098 } 1099 PetscFunctionReturnVoid(); 1100 } 1101 EXTERN_C_END 1102 1103 /* 1104 Checks for missing diagonals 1105 */ 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1108 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1109 { 1110 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 PetscInt *diag,*jj = a->j,i; 1113 1114 PetscFunctionBegin; 1115 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1116 *missing = PETSC_FALSE; 1117 if (A->rmap->n > 0 && !jj) { 1118 *missing = PETSC_TRUE; 1119 if (d) *d = 0; 1120 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1121 } else { 1122 diag = a->diag; 1123 for (i=0; i<a->mbs; i++) { 1124 if (jj[diag[i]] != i) { 1125 *missing = PETSC_TRUE; 1126 if (d) *d = i; 1127 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1128 } 1129 } 1130 } 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1136 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1137 { 1138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1139 PetscErrorCode ierr; 1140 PetscInt i,j,m = a->mbs; 1141 1142 PetscFunctionBegin; 1143 if (!a->diag) { 1144 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1145 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1146 a->free_diag = PETSC_TRUE; 1147 } 1148 for (i=0; i<m; i++) { 1149 a->diag[i] = a->i[i+1]; 1150 for (j=a->i[i]; j<a->i[i+1]; j++) { 1151 if (a->j[j] == i) { 1152 a->diag[i] = j; 1153 break; 1154 } 1155 } 1156 } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 1161 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1165 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1166 { 1167 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1168 PetscErrorCode ierr; 1169 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 1170 PetscInt *tia, *tja; 1171 1172 PetscFunctionBegin; 1173 *nn = n; 1174 if (!ia) PetscFunctionReturn(0); 1175 if (symmetric) { 1176 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1177 nz = tia[n]; 1178 } else { 1179 tia = a->i; tja = a->j; 1180 } 1181 1182 if (!blockcompressed && bs > 1) { 1183 (*nn) *= bs; 1184 nbs = bs; 1185 /* malloc & create the natural set of indices */ 1186 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1187 if (n) { 1188 (*ia)[0] = 0; 1189 for (j=1; j<bs; j++) { 1190 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1191 } 1192 } 1193 1194 for (i=1; i<n; i++) { 1195 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1196 for (j=1; j<bs; j++) { 1197 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1198 } 1199 } 1200 if (n) { 1201 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1202 } 1203 1204 if (ja) { 1205 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1206 cnt = 0; 1207 for (i=0; i<n; i++) { 1208 for (j=0; j<bs; j++) { 1209 for (k=tia[i]; k<tia[i+1]; k++) { 1210 for (l=0; l<bs; l++) { 1211 (*ja)[cnt++] = bs*tja[k] + l; 1212 } 1213 } 1214 } 1215 } 1216 } 1217 1218 n *= bs; 1219 nz *= bs*bs; 1220 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1221 ierr = PetscFree(tia);CHKERRQ(ierr); 1222 ierr = PetscFree(tja);CHKERRQ(ierr); 1223 } 1224 } else if (oshift == 1) { 1225 if (symmetric) { 1226 PetscInt nz = tia[A->rmap->n/bs]; 1227 /* add 1 to i and j indices */ 1228 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1229 *ia = tia; 1230 if (ja) { 1231 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1232 *ja = tja; 1233 } 1234 } else { 1235 PetscInt nz = a->i[A->rmap->n/bs]; 1236 /* malloc space and add 1 to i and j indices */ 1237 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1238 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1239 if (ja) { 1240 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1241 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1242 } 1243 } 1244 } else { 1245 *ia = tia; 1246 if (ja) *ja = tja; 1247 } 1248 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1254 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1255 { 1256 PetscErrorCode ierr; 1257 1258 PetscFunctionBegin; 1259 if (!ia) PetscFunctionReturn(0); 1260 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1261 ierr = PetscFree(*ia);CHKERRQ(ierr); 1262 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1269 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1270 { 1271 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 #if defined(PETSC_USE_LOG) 1276 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1277 #endif 1278 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1279 if (a->row) { 1280 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1281 } 1282 if (a->col) { 1283 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1284 } 1285 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1286 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1287 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1288 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1289 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1290 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1291 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1292 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1293 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1294 1295 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1296 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1297 ierr = PetscFree(a);CHKERRQ(ierr); 1298 1299 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1313 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1314 { 1315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 switch (op) { 1320 case MAT_ROW_ORIENTED: 1321 a->roworiented = flg; 1322 break; 1323 case MAT_KEEP_NONZERO_PATTERN: 1324 a->keepnonzeropattern = flg; 1325 break; 1326 case MAT_NEW_NONZERO_LOCATIONS: 1327 a->nonew = (flg ? 0 : 1); 1328 break; 1329 case MAT_NEW_NONZERO_LOCATION_ERR: 1330 a->nonew = (flg ? -1 : 0); 1331 break; 1332 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1333 a->nonew = (flg ? -2 : 0); 1334 break; 1335 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1336 a->nounused = (flg ? -1 : 0); 1337 break; 1338 case MAT_NEW_DIAGONALS: 1339 case MAT_IGNORE_OFF_PROC_ENTRIES: 1340 case MAT_USE_HASH_TABLE: 1341 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1342 break; 1343 case MAT_SYMMETRIC: 1344 case MAT_STRUCTURALLY_SYMMETRIC: 1345 case MAT_HERMITIAN: 1346 case MAT_SYMMETRY_ETERNAL: 1347 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1348 break; 1349 default: 1350 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1357 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1358 { 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1360 PetscErrorCode ierr; 1361 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1362 MatScalar *aa,*aa_i; 1363 PetscScalar *v_i; 1364 1365 PetscFunctionBegin; 1366 bs = A->rmap->bs; 1367 ai = a->i; 1368 aj = a->j; 1369 aa = a->a; 1370 bs2 = a->bs2; 1371 1372 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1373 1374 bn = row/bs; /* Block number */ 1375 bp = row % bs; /* Block Position */ 1376 M = ai[bn+1] - ai[bn]; 1377 *nz = bs*M; 1378 1379 if (v) { 1380 *v = 0; 1381 if (*nz) { 1382 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1383 for (i=0; i<M; i++) { /* for each block in the block row */ 1384 v_i = *v + i*bs; 1385 aa_i = aa + bs2*(ai[bn] + i); 1386 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1387 } 1388 } 1389 } 1390 1391 if (idx) { 1392 *idx = 0; 1393 if (*nz) { 1394 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1395 for (i=0; i<M; i++) { /* for each block in the block row */ 1396 idx_i = *idx + i*bs; 1397 itmp = bs*aj[ai[bn] + i]; 1398 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1399 } 1400 } 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1407 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1408 { 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1413 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1414 PetscFunctionReturn(0); 1415 } 1416 1417 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1421 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1422 { 1423 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1424 Mat C; 1425 PetscErrorCode ierr; 1426 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1427 PetscInt *rows,*cols,bs2=a->bs2; 1428 MatScalar *array; 1429 1430 PetscFunctionBegin; 1431 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1432 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1433 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1434 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1435 1436 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1437 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1438 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1439 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1440 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1441 ierr = PetscFree(col);CHKERRQ(ierr); 1442 } else { 1443 C = *B; 1444 } 1445 1446 array = a->a; 1447 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1448 for (i=0; i<mbs; i++) { 1449 cols[0] = i*bs; 1450 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1451 len = ai[i+1] - ai[i]; 1452 for (j=0; j<len; j++) { 1453 rows[0] = (*aj++)*bs; 1454 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1455 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1456 array += bs2; 1457 } 1458 } 1459 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1460 1461 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1462 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1463 1464 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1465 *B = C; 1466 } else { 1467 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1474 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1475 { 1476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1479 int fd; 1480 PetscScalar *aa; 1481 FILE *file; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1485 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1486 col_lens[0] = MAT_FILE_COOKIE; 1487 1488 col_lens[1] = A->rmap->N; 1489 col_lens[2] = A->cmap->n; 1490 col_lens[3] = a->nz*bs2; 1491 1492 /* store lengths of each row and write (including header) to file */ 1493 count = 0; 1494 for (i=0; i<a->mbs; i++) { 1495 for (j=0; j<bs; j++) { 1496 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1497 } 1498 } 1499 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1500 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1501 1502 /* store column indices (zero start index) */ 1503 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1504 count = 0; 1505 for (i=0; i<a->mbs; i++) { 1506 for (j=0; j<bs; j++) { 1507 for (k=a->i[i]; k<a->i[i+1]; k++) { 1508 for (l=0; l<bs; l++) { 1509 jj[count++] = bs*a->j[k] + l; 1510 } 1511 } 1512 } 1513 } 1514 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1515 ierr = PetscFree(jj);CHKERRQ(ierr); 1516 1517 /* store nonzero values */ 1518 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1519 count = 0; 1520 for (i=0; i<a->mbs; i++) { 1521 for (j=0; j<bs; j++) { 1522 for (k=a->i[i]; k<a->i[i+1]; k++) { 1523 for (l=0; l<bs; l++) { 1524 aa[count++] = a->a[bs2*k + l*bs + j]; 1525 } 1526 } 1527 } 1528 } 1529 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1530 ierr = PetscFree(aa);CHKERRQ(ierr); 1531 1532 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1533 if (file) { 1534 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1541 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1542 { 1543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1544 PetscErrorCode ierr; 1545 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1546 PetscViewerFormat format; 1547 1548 PetscFunctionBegin; 1549 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1550 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1551 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1552 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1553 Mat aij; 1554 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1555 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1556 ierr = MatDestroy(aij);CHKERRQ(ierr); 1557 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1558 PetscFunctionReturn(0); 1559 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1560 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1561 for (i=0; i<a->mbs; i++) { 1562 for (j=0; j<bs; j++) { 1563 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1564 for (k=a->i[i]; k<a->i[i+1]; k++) { 1565 for (l=0; l<bs; l++) { 1566 #if defined(PETSC_USE_COMPLEX) 1567 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1568 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1569 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1570 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1571 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1572 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1573 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1574 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1575 } 1576 #else 1577 if (a->a[bs2*k + l*bs + j] != 0.0) { 1578 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1579 } 1580 #endif 1581 } 1582 } 1583 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1584 } 1585 } 1586 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1587 } else { 1588 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1589 for (i=0; i<a->mbs; i++) { 1590 for (j=0; j<bs; j++) { 1591 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1592 for (k=a->i[i]; k<a->i[i+1]; k++) { 1593 for (l=0; l<bs; l++) { 1594 #if defined(PETSC_USE_COMPLEX) 1595 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1596 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1597 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1598 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1599 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1600 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1601 } else { 1602 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1603 } 1604 #else 1605 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1606 #endif 1607 } 1608 } 1609 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1613 } 1614 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1620 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1621 { 1622 Mat A = (Mat) Aa; 1623 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1624 PetscErrorCode ierr; 1625 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1626 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1627 MatScalar *aa; 1628 PetscViewer viewer; 1629 1630 PetscFunctionBegin; 1631 1632 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1633 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1634 1635 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1636 1637 /* loop over matrix elements drawing boxes */ 1638 color = PETSC_DRAW_BLUE; 1639 for (i=0,row=0; i<mbs; i++,row+=bs) { 1640 for (j=a->i[i]; j<a->i[i+1]; j++) { 1641 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1642 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1643 aa = a->a + j*bs2; 1644 for (k=0; k<bs; k++) { 1645 for (l=0; l<bs; l++) { 1646 if (PetscRealPart(*aa++) >= 0.) continue; 1647 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1648 } 1649 } 1650 } 1651 } 1652 color = PETSC_DRAW_CYAN; 1653 for (i=0,row=0; i<mbs; i++,row+=bs) { 1654 for (j=a->i[i]; j<a->i[i+1]; j++) { 1655 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1656 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1657 aa = a->a + j*bs2; 1658 for (k=0; k<bs; k++) { 1659 for (l=0; l<bs; l++) { 1660 if (PetscRealPart(*aa++) != 0.) continue; 1661 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1662 } 1663 } 1664 } 1665 } 1666 1667 color = PETSC_DRAW_RED; 1668 for (i=0,row=0; i<mbs; i++,row+=bs) { 1669 for (j=a->i[i]; j<a->i[i+1]; j++) { 1670 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1671 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1672 aa = a->a + j*bs2; 1673 for (k=0; k<bs; k++) { 1674 for (l=0; l<bs; l++) { 1675 if (PetscRealPart(*aa++) <= 0.) continue; 1676 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1677 } 1678 } 1679 } 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1686 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1687 { 1688 PetscErrorCode ierr; 1689 PetscReal xl,yl,xr,yr,w,h; 1690 PetscDraw draw; 1691 PetscTruth isnull; 1692 1693 PetscFunctionBegin; 1694 1695 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1696 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1697 1698 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1699 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1700 xr += w; yr += h; xl = -w; yl = -h; 1701 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1702 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1703 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatView_SeqBAIJ" 1709 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1710 { 1711 PetscErrorCode ierr; 1712 PetscTruth iascii,isbinary,isdraw; 1713 1714 PetscFunctionBegin; 1715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1717 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1718 if (iascii){ 1719 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1720 } else if (isbinary) { 1721 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1722 } else if (isdraw) { 1723 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1724 } else { 1725 Mat B; 1726 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1727 ierr = MatView(B,viewer);CHKERRQ(ierr); 1728 ierr = MatDestroy(B);CHKERRQ(ierr); 1729 } 1730 PetscFunctionReturn(0); 1731 } 1732 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1736 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1737 { 1738 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1739 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1740 PetscInt *ai = a->i,*ailen = a->ilen; 1741 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1742 MatScalar *ap,*aa = a->a; 1743 1744 PetscFunctionBegin; 1745 for (k=0; k<m; k++) { /* loop over rows */ 1746 row = im[k]; brow = row/bs; 1747 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1748 if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1749 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1750 nrow = ailen[brow]; 1751 for (l=0; l<n; l++) { /* loop over columns */ 1752 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1753 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1754 col = in[l] ; 1755 bcol = col/bs; 1756 cidx = col%bs; 1757 ridx = row%bs; 1758 high = nrow; 1759 low = 0; /* assume unsorted */ 1760 while (high-low > 5) { 1761 t = (low+high)/2; 1762 if (rp[t] > bcol) high = t; 1763 else low = t; 1764 } 1765 for (i=low; i<high; i++) { 1766 if (rp[i] > bcol) break; 1767 if (rp[i] == bcol) { 1768 *v++ = ap[bs2*i+bs*cidx+ridx]; 1769 goto finished; 1770 } 1771 } 1772 *v++ = 0.0; 1773 finished:; 1774 } 1775 } 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #define CHUNKSIZE 10 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1782 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1783 { 1784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1785 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1786 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1787 PetscErrorCode ierr; 1788 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1789 PetscTruth roworiented=a->roworiented; 1790 const PetscScalar *value = v; 1791 MatScalar *ap,*aa = a->a,*bap; 1792 1793 PetscFunctionBegin; 1794 if (roworiented) { 1795 stepval = (n-1)*bs; 1796 } else { 1797 stepval = (m-1)*bs; 1798 } 1799 for (k=0; k<m; k++) { /* loop over added rows */ 1800 row = im[k]; 1801 if (row < 0) continue; 1802 #if defined(PETSC_USE_DEBUG) 1803 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1804 #endif 1805 rp = aj + ai[row]; 1806 ap = aa + bs2*ai[row]; 1807 rmax = imax[row]; 1808 nrow = ailen[row]; 1809 low = 0; 1810 high = nrow; 1811 for (l=0; l<n; l++) { /* loop over added columns */ 1812 if (in[l] < 0) continue; 1813 #if defined(PETSC_USE_DEBUG) 1814 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1815 #endif 1816 col = in[l]; 1817 if (roworiented) { 1818 value = v + k*(stepval+bs)*bs + l*bs; 1819 } else { 1820 value = v + l*(stepval+bs)*bs + k*bs; 1821 } 1822 if (col <= lastcol) low = 0; else high = nrow; 1823 lastcol = col; 1824 while (high-low > 7) { 1825 t = (low+high)/2; 1826 if (rp[t] > col) high = t; 1827 else low = t; 1828 } 1829 for (i=low; i<high; i++) { 1830 if (rp[i] > col) break; 1831 if (rp[i] == col) { 1832 bap = ap + bs2*i; 1833 if (roworiented) { 1834 if (is == ADD_VALUES) { 1835 for (ii=0; ii<bs; ii++,value+=stepval) { 1836 for (jj=ii; jj<bs2; jj+=bs) { 1837 bap[jj] += *value++; 1838 } 1839 } 1840 } else { 1841 for (ii=0; ii<bs; ii++,value+=stepval) { 1842 for (jj=ii; jj<bs2; jj+=bs) { 1843 bap[jj] = *value++; 1844 } 1845 } 1846 } 1847 } else { 1848 if (is == ADD_VALUES) { 1849 for (ii=0; ii<bs; ii++,value+=stepval) { 1850 for (jj=0; jj<bs; jj++) { 1851 *bap++ += *value++; 1852 } 1853 } 1854 } else { 1855 for (ii=0; ii<bs; ii++,value+=stepval) { 1856 for (jj=0; jj<bs; jj++) { 1857 *bap++ = *value++; 1858 } 1859 } 1860 } 1861 } 1862 goto noinsert2; 1863 } 1864 } 1865 if (nonew == 1) goto noinsert2; 1866 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1867 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1868 N = nrow++ - 1; high++; 1869 /* shift up all the later entries in this row */ 1870 for (ii=N; ii>=i; ii--) { 1871 rp[ii+1] = rp[ii]; 1872 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1873 } 1874 if (N >= i) { 1875 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1876 } 1877 rp[i] = col; 1878 bap = ap + bs2*i; 1879 if (roworiented) { 1880 for (ii=0; ii<bs; ii++,value+=stepval) { 1881 for (jj=ii; jj<bs2; jj+=bs) { 1882 bap[jj] = *value++; 1883 } 1884 } 1885 } else { 1886 for (ii=0; ii<bs; ii++,value+=stepval) { 1887 for (jj=0; jj<bs; jj++) { 1888 *bap++ = *value++; 1889 } 1890 } 1891 } 1892 noinsert2:; 1893 low = i; 1894 } 1895 ailen[row] = nrow; 1896 } 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1902 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1903 { 1904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1905 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1906 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1907 PetscErrorCode ierr; 1908 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1909 MatScalar *aa = a->a,*ap; 1910 PetscReal ratio=0.6; 1911 1912 PetscFunctionBegin; 1913 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1914 1915 if (m) rmax = ailen[0]; 1916 for (i=1; i<mbs; i++) { 1917 /* move each row back by the amount of empty slots (fshift) before it*/ 1918 fshift += imax[i-1] - ailen[i-1]; 1919 rmax = PetscMax(rmax,ailen[i]); 1920 if (fshift) { 1921 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1922 N = ailen[i]; 1923 for (j=0; j<N; j++) { 1924 ip[j-fshift] = ip[j]; 1925 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1926 } 1927 } 1928 ai[i] = ai[i-1] + ailen[i-1]; 1929 } 1930 if (mbs) { 1931 fshift += imax[mbs-1] - ailen[mbs-1]; 1932 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1933 } 1934 /* reset ilen and imax for each row */ 1935 for (i=0; i<mbs; i++) { 1936 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1937 } 1938 a->nz = ai[mbs]; 1939 1940 /* diagonals may have moved, so kill the diagonal pointers */ 1941 a->idiagvalid = PETSC_FALSE; 1942 if (fshift && a->diag) { 1943 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1944 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1945 a->diag = 0; 1946 } 1947 if (fshift && a->nounused == -1) { 1948 SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1949 } 1950 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1953 a->reallocs = 0; 1954 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1955 1956 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1957 if (a->compressedrow.use){ 1958 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1959 } 1960 1961 A->same_nonzero = PETSC_TRUE; 1962 PetscFunctionReturn(0); 1963 } 1964 1965 /* 1966 This function returns an array of flags which indicate the locations of contiguous 1967 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1968 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1969 Assume: sizes should be long enough to hold all the values. 1970 */ 1971 #undef __FUNCT__ 1972 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1973 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1974 { 1975 PetscInt i,j,k,row; 1976 PetscTruth flg; 1977 1978 PetscFunctionBegin; 1979 for (i=0,j=0; i<n; j++) { 1980 row = idx[i]; 1981 if (row%bs!=0) { /* Not the begining of a block */ 1982 sizes[j] = 1; 1983 i++; 1984 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1985 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1986 i++; 1987 } else { /* Begining of the block, so check if the complete block exists */ 1988 flg = PETSC_TRUE; 1989 for (k=1; k<bs; k++) { 1990 if (row+k != idx[i+k]) { /* break in the block */ 1991 flg = PETSC_FALSE; 1992 break; 1993 } 1994 } 1995 if (flg) { /* No break in the bs */ 1996 sizes[j] = bs; 1997 i+= bs; 1998 } else { 1999 sizes[j] = 1; 2000 i++; 2001 } 2002 } 2003 } 2004 *bs_max = j; 2005 PetscFunctionReturn(0); 2006 } 2007 2008 #undef __FUNCT__ 2009 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2010 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2011 { 2012 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2013 PetscErrorCode ierr; 2014 PetscInt i,j,k,count,*rows; 2015 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2016 PetscScalar zero = 0.0; 2017 MatScalar *aa; 2018 2019 PetscFunctionBegin; 2020 /* Make a copy of the IS and sort it */ 2021 /* allocate memory for rows,sizes */ 2022 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2023 2024 /* copy IS values to rows, and sort them */ 2025 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2026 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2027 if (baij->keepnonzeropattern) { 2028 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2029 bs_max = is_n; 2030 A->same_nonzero = PETSC_TRUE; 2031 } else { 2032 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2033 A->same_nonzero = PETSC_FALSE; 2034 } 2035 2036 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2037 row = rows[j]; 2038 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2039 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2040 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2041 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2042 if (diag != 0.0) { 2043 if (baij->ilen[row/bs] > 0) { 2044 baij->ilen[row/bs] = 1; 2045 baij->j[baij->i[row/bs]] = row/bs; 2046 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2047 } 2048 /* Now insert all the diagonal values for this bs */ 2049 for (k=0; k<bs; k++) { 2050 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 } else { /* (diag == 0.0) */ 2053 baij->ilen[row/bs] = 0; 2054 } /* end (diag == 0.0) */ 2055 } else { /* (sizes[i] != bs) */ 2056 #if defined (PETSC_USE_DEBUG) 2057 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2058 #endif 2059 for (k=0; k<count; k++) { 2060 aa[0] = zero; 2061 aa += bs; 2062 } 2063 if (diag != 0.0) { 2064 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2065 } 2066 } 2067 } 2068 2069 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2070 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2076 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2077 { 2078 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2079 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2080 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2081 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2082 PetscErrorCode ierr; 2083 PetscInt ridx,cidx,bs2=a->bs2; 2084 PetscTruth roworiented=a->roworiented; 2085 MatScalar *ap,value,*aa=a->a,*bap; 2086 2087 PetscFunctionBegin; 2088 if (v) PetscValidScalarPointer(v,6); 2089 for (k=0; k<m; k++) { /* loop over added rows */ 2090 row = im[k]; 2091 brow = row/bs; 2092 if (row < 0) continue; 2093 #if defined(PETSC_USE_DEBUG) 2094 if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2095 #endif 2096 rp = aj + ai[brow]; 2097 ap = aa + bs2*ai[brow]; 2098 rmax = imax[brow]; 2099 nrow = ailen[brow]; 2100 low = 0; 2101 high = nrow; 2102 for (l=0; l<n; l++) { /* loop over added columns */ 2103 if (in[l] < 0) continue; 2104 #if defined(PETSC_USE_DEBUG) 2105 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2106 #endif 2107 col = in[l]; bcol = col/bs; 2108 ridx = row % bs; cidx = col % bs; 2109 if (roworiented) { 2110 value = v[l + k*n]; 2111 } else { 2112 value = v[k + l*m]; 2113 } 2114 if (col <= lastcol) low = 0; else high = nrow; 2115 lastcol = col; 2116 while (high-low > 7) { 2117 t = (low+high)/2; 2118 if (rp[t] > bcol) high = t; 2119 else low = t; 2120 } 2121 for (i=low; i<high; i++) { 2122 if (rp[i] > bcol) break; 2123 if (rp[i] == bcol) { 2124 bap = ap + bs2*i + bs*cidx + ridx; 2125 if (is == ADD_VALUES) *bap += value; 2126 else *bap = value; 2127 goto noinsert1; 2128 } 2129 } 2130 if (nonew == 1) goto noinsert1; 2131 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2133 N = nrow++ - 1; high++; 2134 /* shift up all the later entries in this row */ 2135 for (ii=N; ii>=i; ii--) { 2136 rp[ii+1] = rp[ii]; 2137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2138 } 2139 if (N>=i) { 2140 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2141 } 2142 rp[i] = bcol; 2143 ap[bs2*i + bs*cidx + ridx] = value; 2144 a->nz++; 2145 noinsert1:; 2146 low = i; 2147 } 2148 ailen[brow] = nrow; 2149 } 2150 A->same_nonzero = PETSC_FALSE; 2151 PetscFunctionReturn(0); 2152 } 2153 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2156 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2157 { 2158 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2159 Mat outA; 2160 PetscErrorCode ierr; 2161 PetscTruth row_identity,col_identity; 2162 2163 PetscFunctionBegin; 2164 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2165 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2166 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2167 if (!row_identity || !col_identity) { 2168 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2169 } 2170 2171 outA = inA; 2172 inA->factor = MAT_FACTOR_LU; 2173 2174 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2175 2176 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2177 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2178 a->row = row; 2179 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2180 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2181 a->col = col; 2182 2183 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2184 if (a->icol) { 2185 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2186 } 2187 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2188 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2189 2190 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2191 if (!a->solve_work) { 2192 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2193 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2194 } 2195 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2196 2197 PetscFunctionReturn(0); 2198 } 2199 2200 EXTERN_C_BEGIN 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2203 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2204 { 2205 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2206 PetscInt i,nz,mbs; 2207 2208 PetscFunctionBegin; 2209 nz = baij->maxnz/baij->bs2; 2210 mbs = baij->mbs; 2211 for (i=0; i<nz; i++) { 2212 baij->j[i] = indices[i]; 2213 } 2214 baij->nz = nz; 2215 for (i=0; i<mbs; i++) { 2216 baij->ilen[i] = baij->imax[i]; 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 EXTERN_C_END 2221 2222 #undef __FUNCT__ 2223 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2224 /*@ 2225 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2226 in the matrix. 2227 2228 Input Parameters: 2229 + mat - the SeqBAIJ matrix 2230 - indices - the column indices 2231 2232 Level: advanced 2233 2234 Notes: 2235 This can be called if you have precomputed the nonzero structure of the 2236 matrix and want to provide it to the matrix object to improve the performance 2237 of the MatSetValues() operation. 2238 2239 You MUST have set the correct numbers of nonzeros per row in the call to 2240 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2241 2242 MUST be called before any calls to MatSetValues(); 2243 2244 @*/ 2245 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2246 { 2247 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2248 2249 PetscFunctionBegin; 2250 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 2251 PetscValidPointer(indices,2); 2252 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2253 if (f) { 2254 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2255 } else { 2256 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2257 } 2258 PetscFunctionReturn(0); 2259 } 2260 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2263 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2264 { 2265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2266 PetscErrorCode ierr; 2267 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2268 PetscReal atmp; 2269 PetscScalar *x,zero = 0.0; 2270 MatScalar *aa; 2271 PetscInt ncols,brow,krow,kcol; 2272 2273 PetscFunctionBegin; 2274 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2275 bs = A->rmap->bs; 2276 aa = a->a; 2277 ai = a->i; 2278 aj = a->j; 2279 mbs = a->mbs; 2280 2281 ierr = VecSet(v,zero);CHKERRQ(ierr); 2282 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2283 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2284 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2285 for (i=0; i<mbs; i++) { 2286 ncols = ai[1] - ai[0]; ai++; 2287 brow = bs*i; 2288 for (j=0; j<ncols; j++){ 2289 for (kcol=0; kcol<bs; kcol++){ 2290 for (krow=0; krow<bs; krow++){ 2291 atmp = PetscAbsScalar(*aa);aa++; 2292 row = brow + krow; /* row index */ 2293 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2294 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2295 } 2296 } 2297 aj++; 2298 } 2299 } 2300 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatCopy_SeqBAIJ" 2306 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2307 { 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 /* If the two matrices have the same copy implementation, use fast copy. */ 2312 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2314 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2315 2316 if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 2317 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 2318 } 2319 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 2320 } else { 2321 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2322 } 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2328 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2329 { 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2339 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2340 { 2341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2342 PetscFunctionBegin; 2343 *array = a->a; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2349 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2350 { 2351 PetscFunctionBegin; 2352 PetscFunctionReturn(0); 2353 } 2354 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2357 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2358 { 2359 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2360 PetscErrorCode ierr; 2361 PetscInt i,bs=Y->rmap->bs,j,bs2; 2362 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2363 2364 PetscFunctionBegin; 2365 if (str == SAME_NONZERO_PATTERN) { 2366 PetscScalar alpha = a; 2367 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2368 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2369 if (y->xtoy && y->XtoY != X) { 2370 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2371 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2372 } 2373 if (!y->xtoy) { /* get xtoy */ 2374 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2375 y->XtoY = X; 2376 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2377 } 2378 bs2 = bs*bs; 2379 for (i=0; i<x->nz; i++) { 2380 j = 0; 2381 while (j < bs2){ 2382 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2383 j++; 2384 } 2385 } 2386 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2387 } else { 2388 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2395 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2396 { 2397 PetscInt rbs,cbs; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2402 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2403 if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2404 if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2410 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2411 { 2412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2413 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2414 MatScalar *aa = a->a; 2415 2416 PetscFunctionBegin; 2417 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2423 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2424 { 2425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2426 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2427 MatScalar *aa = a->a; 2428 2429 PetscFunctionBegin; 2430 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2438 /* 2439 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2440 */ 2441 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2442 { 2443 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2444 PetscErrorCode ierr; 2445 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2446 PetscInt nz = a->i[m],row,*jj,mr,col; 2447 2448 PetscFunctionBegin; 2449 *nn = n; 2450 if (!ia) PetscFunctionReturn(0); 2451 if (symmetric) { 2452 SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 2453 } else { 2454 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2455 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2456 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2457 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2458 jj = a->j; 2459 for (i=0; i<nz; i++) { 2460 collengths[jj[i]]++; 2461 } 2462 cia[0] = oshift; 2463 for (i=0; i<n; i++) { 2464 cia[i+1] = cia[i] + collengths[i]; 2465 } 2466 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2467 jj = a->j; 2468 for (row=0; row<m; row++) { 2469 mr = a->i[row+1] - a->i[row]; 2470 for (i=0; i<mr; i++) { 2471 col = *jj++; 2472 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2473 } 2474 } 2475 ierr = PetscFree(collengths);CHKERRQ(ierr); 2476 *ia = cia; *ja = cja; 2477 } 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2483 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2484 { 2485 PetscErrorCode ierr; 2486 2487 PetscFunctionBegin; 2488 if (!ia) PetscFunctionReturn(0); 2489 ierr = PetscFree(*ia);CHKERRQ(ierr); 2490 ierr = PetscFree(*ja);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 #undef __FUNCT__ 2495 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2496 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2497 { 2498 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2499 PetscErrorCode ierr; 2500 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2501 PetscScalar dx,*y,*xx,*w3_array; 2502 PetscScalar *vscale_array; 2503 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2504 Vec w1=coloring->w1,w2=coloring->w2,w3; 2505 void *fctx = coloring->fctx; 2506 PetscTruth flg = PETSC_FALSE; 2507 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2508 Vec x1_tmp; 2509 2510 PetscFunctionBegin; 2511 PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2512 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2513 PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2514 if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2515 2516 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2517 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2518 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2519 if (flg) { 2520 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2521 } else { 2522 PetscTruth assembled; 2523 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2524 if (assembled) { 2525 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2526 } 2527 } 2528 2529 x1_tmp = x1; 2530 if (!coloring->vscale){ 2531 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2532 } 2533 2534 /* 2535 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2536 coloring->F for the coarser grids from the finest 2537 */ 2538 if (coloring->F) { 2539 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2540 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2541 if (m1 != m2) { 2542 coloring->F = 0; 2543 } 2544 } 2545 2546 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2547 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2548 } 2549 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2550 2551 /* Set w1 = F(x1) */ 2552 if (coloring->F) { 2553 w1 = coloring->F; /* use already computed value of function */ 2554 coloring->F = 0; 2555 } else { 2556 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2557 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2558 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2559 } 2560 2561 if (!coloring->w3) { 2562 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2563 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2564 } 2565 w3 = coloring->w3; 2566 2567 CHKMEMQ; 2568 /* Compute all the local scale factors, including ghost points */ 2569 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2570 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2571 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2572 if (ctype == IS_COLORING_GHOSTED){ 2573 col_start = 0; col_end = N; 2574 } else if (ctype == IS_COLORING_GLOBAL){ 2575 xx = xx - start; 2576 vscale_array = vscale_array - start; 2577 col_start = start; col_end = N + start; 2578 } CHKMEMQ; 2579 for (col=col_start; col<col_end; col++){ 2580 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2581 if (coloring->htype[0] == 'w') { 2582 dx = 1.0 + unorm; 2583 } else { 2584 dx = xx[col]; 2585 } 2586 if (dx == 0.0) dx = 1.0; 2587 #if !defined(PETSC_USE_COMPLEX) 2588 if (dx < umin && dx >= 0.0) dx = umin; 2589 else if (dx < 0.0 && dx > -umin) dx = -umin; 2590 #else 2591 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2592 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2593 #endif 2594 dx *= epsilon; 2595 vscale_array[col] = 1.0/dx; 2596 } CHKMEMQ; 2597 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2598 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2599 if (ctype == IS_COLORING_GLOBAL){ 2600 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2601 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2602 } 2603 CHKMEMQ; 2604 if (coloring->vscaleforrow) { 2605 vscaleforrow = coloring->vscaleforrow; 2606 } else { 2607 SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2608 } 2609 2610 2611 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2612 /* 2613 Loop over each color 2614 */ 2615 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2616 for (k=0; k<coloring->ncolors; k++) { 2617 coloring->currentcolor = k; 2618 for (i=0; i<bs; i++) { 2619 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2620 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2621 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2622 /* 2623 Loop over each column associated with color 2624 adding the perturbation to the vector w3. 2625 */ 2626 for (l=0; l<coloring->ncolumns[k]; l++) { 2627 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2628 if (coloring->htype[0] == 'w') { 2629 dx = 1.0 + unorm; 2630 } else { 2631 dx = xx[col]; 2632 } 2633 if (dx == 0.0) dx = 1.0; 2634 #if !defined(PETSC_USE_COMPLEX) 2635 if (dx < umin && dx >= 0.0) dx = umin; 2636 else if (dx < 0.0 && dx > -umin) dx = -umin; 2637 #else 2638 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2639 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2640 #endif 2641 dx *= epsilon; 2642 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2643 w3_array[col] += dx; 2644 } 2645 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2646 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2647 2648 /* 2649 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2650 w2 = F(x1 + dx) - F(x1) 2651 */ 2652 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2653 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2654 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2655 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2656 2657 /* 2658 Loop over rows of vector, putting results into Jacobian matrix 2659 */ 2660 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2661 for (l=0; l<coloring->nrows[k]; l++) { 2662 row = bs*coloring->rows[k][l]; /* local row index */ 2663 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2664 for (j=0; j<bs; j++) { 2665 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2666 srows[j] = row + start + j; 2667 } 2668 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2669 } 2670 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2671 } 2672 } /* endof for each color */ 2673 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2674 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2675 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2676 ierr = PetscFree(srows);CHKERRQ(ierr); 2677 2678 coloring->currentcolor = -1; 2679 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2681 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2682 PetscFunctionReturn(0); 2683 } 2684 2685 /* -------------------------------------------------------------------*/ 2686 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2687 MatGetRow_SeqBAIJ, 2688 MatRestoreRow_SeqBAIJ, 2689 MatMult_SeqBAIJ_N, 2690 /* 4*/ MatMultAdd_SeqBAIJ_N, 2691 MatMultTranspose_SeqBAIJ, 2692 MatMultTransposeAdd_SeqBAIJ, 2693 0, 2694 0, 2695 0, 2696 /*10*/ 0, 2697 MatLUFactor_SeqBAIJ, 2698 0, 2699 0, 2700 MatTranspose_SeqBAIJ, 2701 /*15*/ MatGetInfo_SeqBAIJ, 2702 MatEqual_SeqBAIJ, 2703 MatGetDiagonal_SeqBAIJ, 2704 MatDiagonalScale_SeqBAIJ, 2705 MatNorm_SeqBAIJ, 2706 /*20*/ 0, 2707 MatAssemblyEnd_SeqBAIJ, 2708 MatSetOption_SeqBAIJ, 2709 MatZeroEntries_SeqBAIJ, 2710 /*24*/ MatZeroRows_SeqBAIJ, 2711 0, 2712 0, 2713 0, 2714 0, 2715 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2716 0, 2717 0, 2718 MatGetArray_SeqBAIJ, 2719 MatRestoreArray_SeqBAIJ, 2720 /*34*/ MatDuplicate_SeqBAIJ, 2721 0, 2722 0, 2723 MatILUFactor_SeqBAIJ, 2724 0, 2725 /*39*/ MatAXPY_SeqBAIJ, 2726 MatGetSubMatrices_SeqBAIJ, 2727 MatIncreaseOverlap_SeqBAIJ, 2728 MatGetValues_SeqBAIJ, 2729 MatCopy_SeqBAIJ, 2730 /*44*/ 0, 2731 MatScale_SeqBAIJ, 2732 0, 2733 0, 2734 0, 2735 /*49*/ MatSetBlockSize_SeqBAIJ, 2736 MatGetRowIJ_SeqBAIJ, 2737 MatRestoreRowIJ_SeqBAIJ, 2738 MatGetColumnIJ_SeqBAIJ, 2739 MatRestoreColumnIJ_SeqBAIJ, 2740 /*54*/ MatFDColoringCreate_SeqAIJ, 2741 0, 2742 0, 2743 0, 2744 MatSetValuesBlocked_SeqBAIJ, 2745 /*59*/ MatGetSubMatrix_SeqBAIJ, 2746 MatDestroy_SeqBAIJ, 2747 MatView_SeqBAIJ, 2748 0, 2749 0, 2750 /*64*/ 0, 2751 0, 2752 0, 2753 0, 2754 0, 2755 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2756 0, 2757 MatConvert_Basic, 2758 0, 2759 0, 2760 /*74*/ 0, 2761 MatFDColoringApply_BAIJ, 2762 0, 2763 0, 2764 0, 2765 /*79*/ 0, 2766 0, 2767 0, 2768 0, 2769 MatLoad_SeqBAIJ, 2770 /*84*/ 0, 2771 0, 2772 0, 2773 0, 2774 0, 2775 /*89*/ 0, 2776 0, 2777 0, 2778 0, 2779 0, 2780 /*94*/ 0, 2781 0, 2782 0, 2783 0, 2784 0, 2785 /*99*/0, 2786 0, 2787 0, 2788 0, 2789 0, 2790 /*104*/0, 2791 MatRealPart_SeqBAIJ, 2792 MatImaginaryPart_SeqBAIJ, 2793 0, 2794 0, 2795 /*109*/0, 2796 0, 2797 0, 2798 0, 2799 MatMissingDiagonal_SeqBAIJ, 2800 /*114*/0, 2801 0, 2802 0, 2803 0, 2804 0, 2805 /*119*/0, 2806 0, 2807 MatMultHermitianTranspose_SeqBAIJ, 2808 MatMultHermitianTransposeAdd_SeqBAIJ 2809 }; 2810 2811 EXTERN_C_BEGIN 2812 #undef __FUNCT__ 2813 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2814 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2815 { 2816 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2817 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2818 PetscErrorCode ierr; 2819 2820 PetscFunctionBegin; 2821 if (aij->nonew != 1) { 2822 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2823 } 2824 2825 /* allocate space for values if not already there */ 2826 if (!aij->saved_values) { 2827 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2828 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2829 } 2830 2831 /* copy values over */ 2832 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2833 PetscFunctionReturn(0); 2834 } 2835 EXTERN_C_END 2836 2837 EXTERN_C_BEGIN 2838 #undef __FUNCT__ 2839 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2840 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2841 { 2842 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2843 PetscErrorCode ierr; 2844 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2845 2846 PetscFunctionBegin; 2847 if (aij->nonew != 1) { 2848 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2849 } 2850 if (!aij->saved_values) { 2851 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2852 } 2853 2854 /* copy values over */ 2855 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 EXTERN_C_END 2859 2860 EXTERN_C_BEGIN 2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2862 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2863 EXTERN_C_END 2864 2865 EXTERN_C_BEGIN 2866 #undef __FUNCT__ 2867 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2868 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2869 { 2870 Mat_SeqBAIJ *b; 2871 PetscErrorCode ierr; 2872 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2873 PetscTruth flg,skipallocation = PETSC_FALSE; 2874 2875 PetscFunctionBegin; 2876 2877 if (nz == MAT_SKIP_ALLOCATION) { 2878 skipallocation = PETSC_TRUE; 2879 nz = 0; 2880 } 2881 2882 if (bs < 0) { 2883 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2884 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2885 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2886 bs = PetscAbs(bs); 2887 } 2888 if (nnz && newbs != bs) { 2889 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2890 } 2891 bs = newbs; 2892 2893 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2894 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2895 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2896 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2897 2898 B->preallocated = PETSC_TRUE; 2899 2900 mbs = B->rmap->n/bs; 2901 nbs = B->cmap->n/bs; 2902 bs2 = bs*bs; 2903 2904 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2905 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2906 } 2907 2908 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2909 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2910 if (nnz) { 2911 for (i=0; i<mbs; i++) { 2912 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2913 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2914 } 2915 } 2916 2917 b = (Mat_SeqBAIJ*)B->data; 2918 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2919 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2920 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2921 2922 if (!flg) { 2923 switch (bs) { 2924 case 1: 2925 B->ops->mult = MatMult_SeqBAIJ_1; 2926 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2927 B->ops->sor = MatSOR_SeqBAIJ_1; 2928 break; 2929 case 2: 2930 B->ops->mult = MatMult_SeqBAIJ_2; 2931 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2932 B->ops->sor = MatSOR_SeqBAIJ_2; 2933 break; 2934 case 3: 2935 B->ops->mult = MatMult_SeqBAIJ_3; 2936 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2937 B->ops->sor = MatSOR_SeqBAIJ_3; 2938 break; 2939 case 4: 2940 B->ops->mult = MatMult_SeqBAIJ_4; 2941 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2942 B->ops->sor = MatSOR_SeqBAIJ_4; 2943 break; 2944 case 5: 2945 B->ops->mult = MatMult_SeqBAIJ_5; 2946 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2947 B->ops->sor = MatSOR_SeqBAIJ_5; 2948 break; 2949 case 6: 2950 B->ops->mult = MatMult_SeqBAIJ_6; 2951 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2952 B->ops->sor = MatSOR_SeqBAIJ_6; 2953 break; 2954 case 7: 2955 B->ops->mult = MatMult_SeqBAIJ_7; 2956 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2957 B->ops->sor = MatSOR_SeqBAIJ_7; 2958 break; 2959 case 15: 2960 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2961 break; 2962 default: 2963 B->ops->mult = MatMult_SeqBAIJ_N; 2964 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2965 break; 2966 } 2967 } 2968 B->rmap->bs = bs; 2969 b->mbs = mbs; 2970 b->nbs = nbs; 2971 if (!skipallocation) { 2972 if (!b->imax) { 2973 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2974 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 2975 b->free_imax_ilen = PETSC_TRUE; 2976 } 2977 /* b->ilen will count nonzeros in each block row so far. */ 2978 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2979 if (!nnz) { 2980 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2981 else if (nz <= 0) nz = 1; 2982 for (i=0; i<mbs; i++) b->imax[i] = nz; 2983 nz = nz*mbs; 2984 } else { 2985 nz = 0; 2986 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2987 } 2988 2989 /* allocate the matrix space */ 2990 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2991 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2992 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2993 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2994 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2995 b->singlemalloc = PETSC_TRUE; 2996 b->i[0] = 0; 2997 for (i=1; i<mbs+1; i++) { 2998 b->i[i] = b->i[i-1] + b->imax[i-1]; 2999 } 3000 b->free_a = PETSC_TRUE; 3001 b->free_ij = PETSC_TRUE; 3002 } else { 3003 b->free_a = PETSC_FALSE; 3004 b->free_ij = PETSC_FALSE; 3005 } 3006 3007 B->rmap->bs = bs; 3008 b->bs2 = bs2; 3009 b->mbs = mbs; 3010 b->nz = 0; 3011 b->maxnz = nz*bs2; 3012 B->info.nz_unneeded = (PetscReal)b->maxnz; 3013 PetscFunctionReturn(0); 3014 } 3015 EXTERN_C_END 3016 3017 EXTERN_C_BEGIN 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3020 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3021 { 3022 PetscInt i,m,nz,nz_max=0,*nnz; 3023 PetscScalar *values=0; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 3028 if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3029 3030 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3031 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3032 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3033 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3034 m = B->rmap->n/bs; 3035 3036 if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3037 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3038 for(i=0; i<m; i++) { 3039 nz = ii[i+1]- ii[i]; 3040 if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3041 nz_max = PetscMax(nz_max, nz); 3042 nnz[i] = nz; 3043 } 3044 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3045 ierr = PetscFree(nnz);CHKERRQ(ierr); 3046 3047 values = (PetscScalar*)V; 3048 if (!values) { 3049 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3050 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3051 } 3052 for (i=0; i<m; i++) { 3053 PetscInt ncols = ii[i+1] - ii[i]; 3054 const PetscInt *icols = jj + ii[i]; 3055 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3056 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3057 } 3058 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3059 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3060 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 3062 PetscFunctionReturn(0); 3063 } 3064 EXTERN_C_END 3065 3066 3067 EXTERN_C_BEGIN 3068 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3070 EXTERN_C_END 3071 3072 /*MC 3073 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3074 block sparse compressed row format. 3075 3076 Options Database Keys: 3077 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3078 3079 Level: beginner 3080 3081 .seealso: MatCreateSeqBAIJ() 3082 M*/ 3083 3084 3085 EXTERN_C_BEGIN 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatCreate_SeqBAIJ" 3088 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3089 { 3090 PetscErrorCode ierr; 3091 PetscMPIInt size; 3092 Mat_SeqBAIJ *b; 3093 3094 PetscFunctionBegin; 3095 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3096 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3097 3098 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3099 B->data = (void*)b; 3100 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3101 B->mapping = 0; 3102 b->row = 0; 3103 b->col = 0; 3104 b->icol = 0; 3105 b->reallocs = 0; 3106 b->saved_values = 0; 3107 3108 b->roworiented = PETSC_TRUE; 3109 b->nonew = 0; 3110 b->diag = 0; 3111 b->solve_work = 0; 3112 b->mult_work = 0; 3113 B->spptr = 0; 3114 B->info.nz_unneeded = (PetscReal)b->maxnz; 3115 b->keepnonzeropattern = PETSC_FALSE; 3116 b->xtoy = 0; 3117 b->XtoY = 0; 3118 b->compressedrow.use = PETSC_FALSE; 3119 b->compressedrow.nrows = 0; 3120 b->compressedrow.i = PETSC_NULL; 3121 b->compressedrow.rindex = PETSC_NULL; 3122 b->compressedrow.checked = PETSC_FALSE; 3123 B->same_nonzero = PETSC_FALSE; 3124 3125 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3126 "MatGetFactorAvailable_seqbaij_petsc", 3127 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3128 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3129 "MatGetFactor_seqbaij_petsc", 3130 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3131 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3132 "MatInvertBlockDiagonal_SeqBAIJ", 3133 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3134 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3135 "MatStoreValues_SeqBAIJ", 3136 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3137 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3138 "MatRetrieveValues_SeqBAIJ", 3139 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3140 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3141 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3142 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3143 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3144 "MatConvert_SeqBAIJ_SeqAIJ", 3145 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3146 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3147 "MatConvert_SeqBAIJ_SeqSBAIJ", 3148 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3149 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3150 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3151 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3152 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3153 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3154 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3155 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3156 PetscFunctionReturn(0); 3157 } 3158 EXTERN_C_END 3159 3160 #undef __FUNCT__ 3161 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3162 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3163 { 3164 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3165 PetscErrorCode ierr; 3166 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3167 3168 PetscFunctionBegin; 3169 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 3170 3171 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3172 c->imax = a->imax; 3173 c->ilen = a->ilen; 3174 c->free_imax_ilen = PETSC_FALSE; 3175 } else { 3176 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3177 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3178 for (i=0; i<mbs; i++) { 3179 c->imax[i] = a->imax[i]; 3180 c->ilen[i] = a->ilen[i]; 3181 } 3182 c->free_imax_ilen = PETSC_TRUE; 3183 } 3184 3185 /* allocate the matrix space */ 3186 if (mallocmatspace){ 3187 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3188 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3189 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3190 c->singlemalloc = PETSC_FALSE; 3191 c->free_ij = PETSC_FALSE; 3192 c->i = a->i; 3193 c->j = a->j; 3194 c->parent = A; 3195 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3196 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3197 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3198 } else { 3199 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3200 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3201 c->singlemalloc = PETSC_TRUE; 3202 c->free_ij = PETSC_TRUE; 3203 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3204 if (mbs > 0) { 3205 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3206 if (cpvalues == MAT_COPY_VALUES) { 3207 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3208 } else { 3209 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3210 } 3211 } 3212 } 3213 } 3214 3215 c->roworiented = a->roworiented; 3216 c->nonew = a->nonew; 3217 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3218 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3219 c->bs2 = a->bs2; 3220 c->mbs = a->mbs; 3221 c->nbs = a->nbs; 3222 3223 if (a->diag) { 3224 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3225 c->diag = a->diag; 3226 c->free_diag = PETSC_FALSE; 3227 } else { 3228 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3229 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3230 for (i=0; i<mbs; i++) { 3231 c->diag[i] = a->diag[i]; 3232 } 3233 c->free_diag = PETSC_TRUE; 3234 } 3235 } else c->diag = 0; 3236 c->nz = a->nz; 3237 c->maxnz = a->maxnz; 3238 c->solve_work = 0; 3239 c->mult_work = 0; 3240 c->free_a = PETSC_TRUE; 3241 c->free_ij = PETSC_TRUE; 3242 C->preallocated = PETSC_TRUE; 3243 C->assembled = PETSC_TRUE; 3244 3245 c->compressedrow.use = a->compressedrow.use; 3246 c->compressedrow.nrows = a->compressedrow.nrows; 3247 c->compressedrow.checked = a->compressedrow.checked; 3248 if (a->compressedrow.checked && a->compressedrow.use){ 3249 i = a->compressedrow.nrows; 3250 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3251 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3252 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3253 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3254 } else { 3255 c->compressedrow.use = PETSC_FALSE; 3256 c->compressedrow.i = PETSC_NULL; 3257 c->compressedrow.rindex = PETSC_NULL; 3258 } 3259 C->same_nonzero = A->same_nonzero; 3260 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3261 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3267 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3268 { 3269 PetscErrorCode ierr; 3270 3271 PetscFunctionBegin; 3272 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3273 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3274 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3275 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3276 PetscFunctionReturn(0); 3277 } 3278 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatLoad_SeqBAIJ" 3281 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 3282 { 3283 Mat_SeqBAIJ *a; 3284 Mat B; 3285 PetscErrorCode ierr; 3286 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3287 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3288 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3289 PetscInt *masked,nmask,tmp,bs2,ishift; 3290 PetscMPIInt size; 3291 int fd; 3292 PetscScalar *aa; 3293 MPI_Comm comm = ((PetscObject)viewer)->comm; 3294 3295 PetscFunctionBegin; 3296 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3297 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3298 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3299 bs2 = bs*bs; 3300 3301 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3302 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3303 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3304 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3305 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3306 M = header[1]; N = header[2]; nz = header[3]; 3307 3308 if (header[3] < 0) { 3309 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3310 } 3311 3312 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 3313 3314 /* 3315 This code adds extra rows to make sure the number of rows is 3316 divisible by the blocksize 3317 */ 3318 mbs = M/bs; 3319 extra_rows = bs - M + bs*(mbs); 3320 if (extra_rows == bs) extra_rows = 0; 3321 else mbs++; 3322 if (extra_rows) { 3323 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3324 } 3325 3326 /* read in row lengths */ 3327 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3328 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3329 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3330 3331 /* read in column indices */ 3332 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3333 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3334 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3335 3336 /* loop over row lengths determining block row lengths */ 3337 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3338 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3339 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3340 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3341 rowcount = 0; 3342 nzcount = 0; 3343 for (i=0; i<mbs; i++) { 3344 nmask = 0; 3345 for (j=0; j<bs; j++) { 3346 kmax = rowlengths[rowcount]; 3347 for (k=0; k<kmax; k++) { 3348 tmp = jj[nzcount++]/bs; 3349 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3350 } 3351 rowcount++; 3352 } 3353 browlengths[i] += nmask; 3354 /* zero out the mask elements we set */ 3355 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3356 } 3357 3358 /* create our matrix */ 3359 ierr = MatCreate(comm,&B); 3360 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 3361 ierr = MatSetType(B,type);CHKERRQ(ierr); 3362 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3363 a = (Mat_SeqBAIJ*)B->data; 3364 3365 /* set matrix "i" values */ 3366 a->i[0] = 0; 3367 for (i=1; i<= mbs; i++) { 3368 a->i[i] = a->i[i-1] + browlengths[i-1]; 3369 a->ilen[i-1] = browlengths[i-1]; 3370 } 3371 a->nz = 0; 3372 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3373 3374 /* read in nonzero values */ 3375 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3376 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3377 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3378 3379 /* set "a" and "j" values into matrix */ 3380 nzcount = 0; jcount = 0; 3381 for (i=0; i<mbs; i++) { 3382 nzcountb = nzcount; 3383 nmask = 0; 3384 for (j=0; j<bs; j++) { 3385 kmax = rowlengths[i*bs+j]; 3386 for (k=0; k<kmax; k++) { 3387 tmp = jj[nzcount++]/bs; 3388 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3389 } 3390 } 3391 /* sort the masked values */ 3392 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3393 3394 /* set "j" values into matrix */ 3395 maskcount = 1; 3396 for (j=0; j<nmask; j++) { 3397 a->j[jcount++] = masked[j]; 3398 mask[masked[j]] = maskcount++; 3399 } 3400 /* set "a" values into matrix */ 3401 ishift = bs2*a->i[i]; 3402 for (j=0; j<bs; j++) { 3403 kmax = rowlengths[i*bs+j]; 3404 for (k=0; k<kmax; k++) { 3405 tmp = jj[nzcountb]/bs ; 3406 block = mask[tmp] - 1; 3407 point = jj[nzcountb] - bs*tmp; 3408 idx = ishift + bs2*block + j + bs*point; 3409 a->a[idx] = (MatScalar)aa[nzcountb++]; 3410 } 3411 } 3412 /* zero out the mask elements we set */ 3413 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3414 } 3415 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3416 3417 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3418 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3419 ierr = PetscFree(aa);CHKERRQ(ierr); 3420 ierr = PetscFree(jj);CHKERRQ(ierr); 3421 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3422 3423 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3424 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3425 ierr = MatView_Private(B);CHKERRQ(ierr); 3426 3427 *A = B; 3428 PetscFunctionReturn(0); 3429 } 3430 3431 #undef __FUNCT__ 3432 #define __FUNCT__ "MatCreateSeqBAIJ" 3433 /*@C 3434 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3435 compressed row) format. For good matrix assembly performance the 3436 user should preallocate the matrix storage by setting the parameter nz 3437 (or the array nnz). By setting these parameters accurately, performance 3438 during matrix assembly can be increased by more than a factor of 50. 3439 3440 Collective on MPI_Comm 3441 3442 Input Parameters: 3443 + comm - MPI communicator, set to PETSC_COMM_SELF 3444 . bs - size of block 3445 . m - number of rows 3446 . n - number of columns 3447 . nz - number of nonzero blocks per block row (same for all rows) 3448 - nnz - array containing the number of nonzero blocks in the various block rows 3449 (possibly different for each block row) or PETSC_NULL 3450 3451 Output Parameter: 3452 . A - the matrix 3453 3454 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3455 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3456 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3457 3458 Options Database Keys: 3459 . -mat_no_unroll - uses code that does not unroll the loops in the 3460 block calculations (much slower) 3461 . -mat_block_size - size of the blocks to use 3462 3463 Level: intermediate 3464 3465 Notes: 3466 The number of rows and columns must be divisible by blocksize. 3467 3468 If the nnz parameter is given then the nz parameter is ignored 3469 3470 A nonzero block is any block that as 1 or more nonzeros in it 3471 3472 The block AIJ format is fully compatible with standard Fortran 77 3473 storage. That is, the stored row and column indices can begin at 3474 either one (as in Fortran) or zero. See the users' manual for details. 3475 3476 Specify the preallocated storage with either nz or nnz (not both). 3477 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3478 allocation. For additional details, see the users manual chapter on 3479 matrices. 3480 3481 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3482 @*/ 3483 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3484 { 3485 PetscErrorCode ierr; 3486 3487 PetscFunctionBegin; 3488 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3489 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3490 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3491 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3492 PetscFunctionReturn(0); 3493 } 3494 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3497 /*@C 3498 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3499 per row in the matrix. For good matrix assembly performance the 3500 user should preallocate the matrix storage by setting the parameter nz 3501 (or the array nnz). By setting these parameters accurately, performance 3502 during matrix assembly can be increased by more than a factor of 50. 3503 3504 Collective on MPI_Comm 3505 3506 Input Parameters: 3507 + A - the matrix 3508 . bs - size of block 3509 . nz - number of block nonzeros per block row (same for all rows) 3510 - nnz - array containing the number of block nonzeros in the various block rows 3511 (possibly different for each block row) or PETSC_NULL 3512 3513 Options Database Keys: 3514 . -mat_no_unroll - uses code that does not unroll the loops in the 3515 block calculations (much slower) 3516 . -mat_block_size - size of the blocks to use 3517 3518 Level: intermediate 3519 3520 Notes: 3521 If the nnz parameter is given then the nz parameter is ignored 3522 3523 You can call MatGetInfo() to get information on how effective the preallocation was; 3524 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3525 You can also run with the option -info and look for messages with the string 3526 malloc in them to see if additional memory allocation was needed. 3527 3528 The block AIJ format is fully compatible with standard Fortran 77 3529 storage. That is, the stored row and column indices can begin at 3530 either one (as in Fortran) or zero. See the users' manual for details. 3531 3532 Specify the preallocated storage with either nz or nnz (not both). 3533 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3534 allocation. For additional details, see the users manual chapter on 3535 matrices. 3536 3537 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3538 @*/ 3539 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3540 { 3541 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3542 3543 PetscFunctionBegin; 3544 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3545 if (f) { 3546 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3547 } 3548 PetscFunctionReturn(0); 3549 } 3550 3551 #undef __FUNCT__ 3552 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3553 /*@C 3554 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3555 (the default sequential PETSc format). 3556 3557 Collective on MPI_Comm 3558 3559 Input Parameters: 3560 + A - the matrix 3561 . i - the indices into j for the start of each local row (starts with zero) 3562 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3563 - v - optional values in the matrix 3564 3565 Level: developer 3566 3567 .keywords: matrix, aij, compressed row, sparse 3568 3569 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3570 @*/ 3571 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3572 { 3573 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3574 3575 PetscFunctionBegin; 3576 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3577 if (f) { 3578 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3579 } 3580 PetscFunctionReturn(0); 3581 } 3582 3583 3584 #undef __FUNCT__ 3585 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3586 /*@ 3587 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3588 (upper triangular entries in CSR format) provided by the user. 3589 3590 Collective on MPI_Comm 3591 3592 Input Parameters: 3593 + comm - must be an MPI communicator of size 1 3594 . bs - size of block 3595 . m - number of rows 3596 . n - number of columns 3597 . i - row indices 3598 . j - column indices 3599 - a - matrix values 3600 3601 Output Parameter: 3602 . mat - the matrix 3603 3604 Level: intermediate 3605 3606 Notes: 3607 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3608 once the matrix is destroyed 3609 3610 You cannot set new nonzero locations into this matrix, that will generate an error. 3611 3612 The i and j indices are 0 based 3613 3614 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3615 3616 @*/ 3617 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3618 { 3619 PetscErrorCode ierr; 3620 PetscInt ii; 3621 Mat_SeqBAIJ *baij; 3622 3623 PetscFunctionBegin; 3624 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3625 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3626 3627 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3628 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3629 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3630 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3631 baij = (Mat_SeqBAIJ*)(*mat)->data; 3632 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3633 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3634 3635 baij->i = i; 3636 baij->j = j; 3637 baij->a = a; 3638 baij->singlemalloc = PETSC_FALSE; 3639 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3640 baij->free_a = PETSC_FALSE; 3641 baij->free_ij = PETSC_FALSE; 3642 3643 for (ii=0; ii<m; ii++) { 3644 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3645 #if defined(PETSC_USE_DEBUG) 3646 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3647 #endif 3648 } 3649 #if defined(PETSC_USE_DEBUG) 3650 for (ii=0; ii<baij->i[m]; ii++) { 3651 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3652 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3653 } 3654 #endif 3655 3656 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3658 PetscFunctionReturn(0); 3659 } 3660