1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <../src/mat/blockinvert.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 12 /*@ 13 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 14 15 Collective on Mat 16 17 Input Parameters: 18 . mat - the matrix 19 20 Level: advanced 21 @*/ 22 PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat) 23 { 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 28 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 29 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 30 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 31 PetscFunctionReturn(0); 32 } 33 34 EXTERN_C_BEGIN 35 #undef __FUNCT__ 36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 37 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 40 PetscErrorCode ierr; 41 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 42 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 43 PetscReal shift = 0.0; 44 45 PetscFunctionBegin; 46 if (a->idiagvalid) PetscFunctionReturn(0); 47 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 48 diag_offset = a->diag; 49 if (!a->idiag) { 50 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 51 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 52 } 53 diag = a->idiag; 54 mdiag = a->idiag+bs2*mbs; 55 /* factor and invert each block */ 56 switch (bs){ 57 case 1: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 1*diag_offset[i]; 60 diag[0] = odiag[0]; 61 mdiag[0] = odiag[0]; 62 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 63 diag += 1; 64 mdiag += 1; 65 } 66 break; 67 case 2: 68 for (i=0; i<mbs; i++) { 69 odiag = v + 4*diag_offset[i]; 70 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 71 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 72 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 73 diag += 4; 74 mdiag += 4; 75 } 76 break; 77 case 3: 78 for (i=0; i<mbs; i++) { 79 odiag = v + 9*diag_offset[i]; 80 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 81 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 82 diag[8] = odiag[8]; 83 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 84 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 85 mdiag[8] = odiag[8]; 86 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 87 diag += 9; 88 mdiag += 9; 89 } 90 break; 91 case 4: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 16*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 97 diag += 16; 98 mdiag += 16; 99 } 100 break; 101 case 5: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 25*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 107 diag += 25; 108 mdiag += 25; 109 } 110 break; 111 case 6: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 36*diag_offset[i]; 114 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 115 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 117 diag += 36; 118 mdiag += 36; 119 } 120 break; 121 case 7: 122 for (i=0; i<mbs; i++) { 123 odiag = v + 49*diag_offset[i]; 124 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 125 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 127 diag += 49; 128 mdiag += 49; 129 } 130 break; 131 default: 132 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 133 for (i=0; i<mbs; i++) { 134 odiag = v + bs2*diag_offset[i]; 135 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 136 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 138 diag += bs2; 139 mdiag += bs2; 140 } 141 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 142 } 143 a->idiagvalid = PETSC_TRUE; 144 PetscFunctionReturn(0); 145 } 146 EXTERN_C_END 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 151 { 152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 153 PetscScalar *x,x1,s1; 154 const PetscScalar *b; 155 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 156 PetscErrorCode ierr; 157 PetscInt m = a->mbs,i,i2,nz,j; 158 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 159 160 PetscFunctionBegin; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 its = its*lits; 163 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 164 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 165 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 166 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 167 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 168 169 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 170 171 diag = a->diag; 172 idiag = a->idiag; 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 178 x[0] = b[0]*idiag[0]; 179 i2 = 1; 180 idiag += 1; 181 for (i=1; i<m; i++) { 182 v = aa + ai[i]; 183 vi = aj + ai[i]; 184 nz = diag[i] - ai[i]; 185 s1 = b[i2]; 186 for (j=0; j<nz; j++) { 187 s1 -= v[j]*x[vi[j]]; 188 } 189 x[i2] = idiag[0]*s1; 190 idiag += 1; 191 i2 += 1; 192 } 193 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 194 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 195 } 196 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 197 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 198 i2 = 0; 199 mdiag = a->idiag+a->mbs; 200 for (i=0; i<m; i++) { 201 x1 = x[i2]; 202 x[i2] = mdiag[0]*x1; 203 mdiag += 1; 204 i2 += 1; 205 } 206 ierr = PetscLogFlops(m);CHKERRQ(ierr); 207 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 208 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 209 } 210 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 211 idiag = a->idiag+a->mbs - 1; 212 i2 = m - 1; 213 x1 = x[i2]; 214 x[i2] = idiag[0]*x1; 215 idiag -= 1; 216 i2 -= 1; 217 for (i=m-2; i>=0; i--) { 218 v = aa + (diag[i]+1); 219 vi = aj + diag[i] + 1; 220 nz = ai[i+1] - diag[i] - 1; 221 s1 = x[i2]; 222 for (j=0; j<nz; j++) { 223 s1 -= v[j]*x[vi[j]]; 224 } 225 x[i2] = idiag[0]*s1; 226 idiag -= 1; 227 i2 -= 1; 228 } 229 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 230 } 231 } else { 232 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 233 } 234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 235 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 PetscScalar *x,x1,x2,s1,s2; 245 const PetscScalar *b; 246 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 247 PetscErrorCode ierr; 248 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 249 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 250 251 PetscFunctionBegin; 252 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 253 its = its*lits; 254 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 255 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 256 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 257 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 258 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 259 260 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 261 262 diag = a->diag; 263 idiag = a->idiag; 264 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 266 267 if (flag & SOR_ZERO_INITIAL_GUESS) { 268 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 269 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 270 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 271 i2 = 2; 272 idiag += 4; 273 for (i=1; i<m; i++) { 274 v = aa + 4*ai[i]; 275 vi = aj + ai[i]; 276 nz = diag[i] - ai[i]; 277 s1 = b[i2]; s2 = b[i2+1]; 278 for (j=0; j<nz; j++) { 279 idx = 2*vi[j]; 280 it = 4*j; 281 x1 = x[idx]; x2 = x[1+idx]; 282 s1 -= v[it]*x1 + v[it+2]*x2; 283 s2 -= v[it+1]*x1 + v[it+3]*x2; 284 } 285 x[i2] = idiag[0]*s1 + idiag[2]*s2; 286 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 287 idiag += 4; 288 i2 += 2; 289 } 290 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 291 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 292 } 293 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 294 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 295 i2 = 0; 296 mdiag = a->idiag+4*a->mbs; 297 for (i=0; i<m; i++) { 298 x1 = x[i2]; x2 = x[i2+1]; 299 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 300 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 301 mdiag += 4; 302 i2 += 2; 303 } 304 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 305 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 306 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 307 } 308 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 309 idiag = a->idiag+4*a->mbs - 4; 310 i2 = 2*m - 2; 311 x1 = x[i2]; x2 = x[i2+1]; 312 x[i2] = idiag[0]*x1 + idiag[2]*x2; 313 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 314 idiag -= 4; 315 i2 -= 2; 316 for (i=m-2; i>=0; i--) { 317 v = aa + 4*(diag[i]+1); 318 vi = aj + diag[i] + 1; 319 nz = ai[i+1] - diag[i] - 1; 320 s1 = x[i2]; s2 = x[i2+1]; 321 for (j=0; j<nz; j++) { 322 idx = 2*vi[j]; 323 it = 4*j; 324 x1 = x[idx]; x2 = x[1+idx]; 325 s1 -= v[it]*x1 + v[it+2]*x2; 326 s2 -= v[it+1]*x1 + v[it+3]*x2; 327 } 328 x[i2] = idiag[0]*s1 + idiag[2]*s2; 329 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 330 idiag -= 4; 331 i2 -= 2; 332 } 333 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 334 } 335 } else { 336 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 337 } 338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 346 { 347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 348 PetscScalar *x,x1,x2,x3,s1,s2,s3; 349 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 350 const PetscScalar *b; 351 PetscErrorCode ierr; 352 PetscInt m = a->mbs,i,i2,nz,idx; 353 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 354 355 PetscFunctionBegin; 356 its = its*lits; 357 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 358 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 359 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 360 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 361 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 362 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 363 364 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 365 366 diag = a->diag; 367 idiag = a->idiag; 368 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 369 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 370 371 if (flag & SOR_ZERO_INITIAL_GUESS) { 372 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 373 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 374 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 375 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 376 i2 = 3; 377 idiag += 9; 378 for (i=1; i<m; i++) { 379 v = aa + 9*ai[i]; 380 vi = aj + ai[i]; 381 nz = diag[i] - ai[i]; 382 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 383 while (nz--) { 384 idx = 3*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 386 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 387 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 388 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 389 v += 9; 390 } 391 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 392 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 393 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 394 idiag += 9; 395 i2 += 3; 396 } 397 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 398 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 399 } 400 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 401 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 402 i2 = 0; 403 mdiag = a->idiag+9*a->mbs; 404 for (i=0; i<m; i++) { 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 406 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 407 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 408 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 409 mdiag += 9; 410 i2 += 3; 411 } 412 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 413 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 414 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 415 } 416 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 417 idiag = a->idiag+9*a->mbs - 9; 418 i2 = 3*m - 3; 419 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 420 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 421 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 422 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 423 idiag -= 9; 424 i2 -= 3; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 9*(diag[i]+1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 430 while (nz--) { 431 idx = 3*(*vi++); 432 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 433 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 434 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 435 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 436 v += 9; 437 } 438 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 439 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 440 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 441 idiag -= 9; 442 i2 -= 3; 443 } 444 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 445 } 446 } else { 447 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 448 } 449 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 457 { 458 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 459 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 460 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 461 const PetscScalar *b; 462 PetscErrorCode ierr; 463 PetscInt m = a->mbs,i,i2,nz,idx; 464 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 465 466 PetscFunctionBegin; 467 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 468 its = its*lits; 469 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 470 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 471 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 472 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 473 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 474 475 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 476 477 diag = a->diag; 478 idiag = a->idiag; 479 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 480 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 481 482 if (flag & SOR_ZERO_INITIAL_GUESS) { 483 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 484 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 485 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 486 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 487 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 488 i2 = 4; 489 idiag += 16; 490 for (i=1; i<m; i++) { 491 v = aa + 16*ai[i]; 492 vi = aj + ai[i]; 493 nz = diag[i] - ai[i]; 494 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 495 while (nz--) { 496 idx = 4*(*vi++); 497 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 498 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 499 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 500 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 501 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 502 v += 16; 503 } 504 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 505 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 506 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 507 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 508 idiag += 16; 509 i2 += 4; 510 } 511 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 512 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 513 } 514 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 515 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 516 i2 = 0; 517 mdiag = a->idiag+16*a->mbs; 518 for (i=0; i<m; i++) { 519 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 520 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 521 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 522 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 523 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 524 mdiag += 16; 525 i2 += 4; 526 } 527 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 528 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 529 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 530 } 531 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 532 idiag = a->idiag+16*a->mbs - 16; 533 i2 = 4*m - 4; 534 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 535 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 536 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 537 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 538 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 539 idiag -= 16; 540 i2 -= 4; 541 for (i=m-2; i>=0; i--) { 542 v = aa + 16*(diag[i]+1); 543 vi = aj + diag[i] + 1; 544 nz = ai[i+1] - diag[i] - 1; 545 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 546 while (nz--) { 547 idx = 4*(*vi++); 548 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 549 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 550 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 551 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 552 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 553 v += 16; 554 } 555 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 556 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 557 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 558 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 559 idiag -= 16; 560 i2 -= 4; 561 } 562 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 563 } 564 } else { 565 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 566 } 567 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 568 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 575 { 576 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 577 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 578 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 579 const PetscScalar *b; 580 PetscErrorCode ierr; 581 PetscInt m = a->mbs,i,i2,nz,idx; 582 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 583 584 PetscFunctionBegin; 585 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 586 its = its*lits; 587 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 588 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 589 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 590 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 591 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 592 593 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 594 595 diag = a->diag; 596 idiag = a->idiag; 597 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 598 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 599 600 if (flag & SOR_ZERO_INITIAL_GUESS) { 601 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 602 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 603 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 604 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 605 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 606 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 607 i2 = 5; 608 idiag += 25; 609 for (i=1; i<m; i++) { 610 v = aa + 25*ai[i]; 611 vi = aj + ai[i]; 612 nz = diag[i] - ai[i]; 613 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 614 while (nz--) { 615 idx = 5*(*vi++); 616 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 617 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 618 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 619 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 620 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 621 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 622 v += 25; 623 } 624 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 625 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 626 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 627 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 628 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 629 idiag += 25; 630 i2 += 5; 631 } 632 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 633 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 634 } 635 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 636 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 637 i2 = 0; 638 mdiag = a->idiag+25*a->mbs; 639 for (i=0; i<m; i++) { 640 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 641 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 642 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 643 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 644 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 645 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 646 mdiag += 25; 647 i2 += 5; 648 } 649 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 650 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 651 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 652 } 653 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 654 idiag = a->idiag+25*a->mbs - 25; 655 i2 = 5*m - 5; 656 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 657 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 658 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 659 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 660 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 661 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 662 idiag -= 25; 663 i2 -= 5; 664 for (i=m-2; i>=0; i--) { 665 v = aa + 25*(diag[i]+1); 666 vi = aj + diag[i] + 1; 667 nz = ai[i+1] - diag[i] - 1; 668 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 669 while (nz--) { 670 idx = 5*(*vi++); 671 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 672 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 673 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 674 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 675 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 676 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 677 v += 25; 678 } 679 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 680 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 681 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 682 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 683 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 684 idiag -= 25; 685 i2 -= 5; 686 } 687 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 688 } 689 } else { 690 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 691 } 692 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 693 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 700 { 701 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 702 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 703 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 704 const PetscScalar *b; 705 PetscErrorCode ierr; 706 PetscInt m = a->mbs,i,i2,nz,idx; 707 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 708 709 PetscFunctionBegin; 710 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 711 its = its*lits; 712 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 713 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 714 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 715 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 716 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 717 718 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 719 720 diag = a->diag; 721 idiag = a->idiag; 722 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 723 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 724 725 if (flag & SOR_ZERO_INITIAL_GUESS) { 726 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 727 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 728 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 729 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 730 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 731 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 732 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 733 i2 = 6; 734 idiag += 36; 735 for (i=1; i<m; i++) { 736 v = aa + 36*ai[i]; 737 vi = aj + ai[i]; 738 nz = diag[i] - ai[i]; 739 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 740 while (nz--) { 741 idx = 6*(*vi++); 742 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 743 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 744 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 745 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 746 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 747 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 748 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 749 v += 36; 750 } 751 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 752 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 753 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 754 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 755 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 756 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 757 idiag += 36; 758 i2 += 6; 759 } 760 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 761 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 762 } 763 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 764 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 765 i2 = 0; 766 mdiag = a->idiag+36*a->mbs; 767 for (i=0; i<m; i++) { 768 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 769 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 770 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 771 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 772 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 773 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 774 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 775 mdiag += 36; 776 i2 += 6; 777 } 778 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 779 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 780 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 781 } 782 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 783 idiag = a->idiag+36*a->mbs - 36; 784 i2 = 6*m - 6; 785 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 786 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 787 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 788 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 789 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 790 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 791 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 792 idiag -= 36; 793 i2 -= 6; 794 for (i=m-2; i>=0; i--) { 795 v = aa + 36*(diag[i]+1); 796 vi = aj + diag[i] + 1; 797 nz = ai[i+1] - diag[i] - 1; 798 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 799 while (nz--) { 800 idx = 6*(*vi++); 801 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 802 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 803 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 804 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 805 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 806 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 807 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 808 v += 36; 809 } 810 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 811 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 812 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 813 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 814 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 815 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 816 idiag -= 36; 817 i2 -= 6; 818 } 819 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 820 } 821 } else { 822 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 823 } 824 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 825 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 834 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 835 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 836 const PetscScalar *b; 837 PetscErrorCode ierr; 838 PetscInt m = a->mbs,i,i2,nz,idx; 839 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 840 841 PetscFunctionBegin; 842 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 843 its = its*lits; 844 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 845 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 846 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 847 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 848 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 849 850 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 851 852 diag = a->diag; 853 idiag = a->idiag; 854 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 855 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 856 857 if (flag & SOR_ZERO_INITIAL_GUESS) { 858 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 859 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 860 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 861 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 862 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 863 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 864 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 865 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 866 i2 = 7; 867 idiag += 49; 868 for (i=1; i<m; i++) { 869 v = aa + 49*ai[i]; 870 vi = aj + ai[i]; 871 nz = diag[i] - ai[i]; 872 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 873 while (nz--) { 874 idx = 7*(*vi++); 875 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 876 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 877 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 878 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 879 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 880 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 881 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 882 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 883 v += 49; 884 } 885 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 886 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 887 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 888 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 889 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 890 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 891 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 892 idiag += 49; 893 i2 += 7; 894 } 895 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 896 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 897 } 898 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 899 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 900 i2 = 0; 901 mdiag = a->idiag+49*a->mbs; 902 for (i=0; i<m; i++) { 903 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 904 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 905 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 906 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 907 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 908 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 909 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 910 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 911 mdiag += 49; 912 i2 += 7; 913 } 914 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 915 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 916 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 917 } 918 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 919 idiag = a->idiag+49*a->mbs - 49; 920 i2 = 7*m - 7; 921 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 922 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 923 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 924 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 925 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 926 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 927 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 928 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 929 idiag -= 49; 930 i2 -= 7; 931 for (i=m-2; i>=0; i--) { 932 v = aa + 49*(diag[i]+1); 933 vi = aj + diag[i] + 1; 934 nz = ai[i+1] - diag[i] - 1; 935 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 936 while (nz--) { 937 idx = 7*(*vi++); 938 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 939 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 940 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 941 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 942 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 943 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 944 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 945 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 946 v += 49; 947 } 948 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 949 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 950 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 951 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 952 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 953 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 954 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 955 idiag -= 49; 956 i2 -= 7; 957 } 958 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 959 } 960 } else { 961 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 971 { 972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 973 PetscScalar *x,*work,*w,*workt; 974 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 975 const PetscScalar *b; 976 PetscErrorCode ierr; 977 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 978 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 979 980 PetscFunctionBegin; 981 its = its*lits; 982 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 983 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 984 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 985 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 986 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 987 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 988 989 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 990 991 diag = a->diag; 992 idiag = a->idiag; 993 if (!a->mult_work) { 994 k = PetscMax(A->rmap->n,A->cmap->n); 995 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 996 } 997 work = a->mult_work; 998 if (!a->sor_work) { 999 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1000 } 1001 w = a->sor_work; 1002 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1005 1006 if (flag & SOR_ZERO_INITIAL_GUESS) { 1007 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1008 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1009 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1010 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1011 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1012 i2 = bs; 1013 idiag += bs2; 1014 for (i=1; i<m; i++) { 1015 v = aa + bs2*ai[i]; 1016 vi = aj + ai[i]; 1017 nz = diag[i] - ai[i]; 1018 1019 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1020 /* copy all rows of x that are needed into contiguous space */ 1021 workt = work; 1022 for (j=0; j<nz; j++) { 1023 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1024 workt += bs; 1025 } 1026 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1027 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1028 while (nz--) { 1029 idx = N*(*vi++); 1030 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1031 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1032 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1033 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1034 v += N2; 1035 } */ 1036 1037 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1038 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1039 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1040 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1041 1042 idiag += bs2; 1043 i2 += bs; 1044 } 1045 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1046 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1047 } 1048 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1049 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1050 i2 = 0; 1051 mdiag = a->idiag+bs2*a->mbs; 1052 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1053 for (i=0; i<m; i++) { 1054 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1055 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1056 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1057 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1058 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1059 1060 mdiag += bs2; 1061 i2 += bs; 1062 } 1063 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1064 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1065 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1066 } 1067 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 idiag = a->idiag+bs2*a->mbs - bs2; 1069 i2 = bs*m - bs; 1070 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1071 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1072 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1073 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1074 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1075 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1076 idiag -= bs2; 1077 i2 -= bs; 1078 for (i=m-2; i>=0; i--) { 1079 v = aa + bs2*(diag[i]+1); 1080 vi = aj + diag[i] + 1; 1081 nz = ai[i+1] - diag[i] - 1; 1082 1083 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1084 /* copy all rows of x that are needed into contiguous space */ 1085 workt = work; 1086 for (j=0; j<nz; j++) { 1087 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1088 workt += bs; 1089 } 1090 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1091 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1092 while (nz--) { 1093 idx = N*(*vi++); 1094 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1095 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1096 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1097 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1098 v += N2; 1099 } */ 1100 1101 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1102 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1103 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1104 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1105 idiag -= bs2; 1106 i2 -= bs; 1107 } 1108 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1109 } 1110 } else { 1111 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1112 } 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* 1119 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1120 */ 1121 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1124 #define matsetvaluesblocked4_ matsetvaluesblocked4 1125 #endif 1126 1127 EXTERN_C_BEGIN 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "matsetvaluesblocked4_" 1130 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1131 { 1132 Mat A = *AA; 1133 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1134 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1135 PetscInt *ai=a->i,*ailen=a->ilen; 1136 PetscInt *aj=a->j,stepval,lastcol = -1; 1137 const PetscScalar *value = v; 1138 MatScalar *ap,*aa = a->a,*bap; 1139 1140 PetscFunctionBegin; 1141 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1142 stepval = (n-1)*4; 1143 for (k=0; k<m; k++) { /* loop over added rows */ 1144 row = im[k]; 1145 rp = aj + ai[row]; 1146 ap = aa + 16*ai[row]; 1147 nrow = ailen[row]; 1148 low = 0; 1149 high = nrow; 1150 for (l=0; l<n; l++) { /* loop over added columns */ 1151 col = in[l]; 1152 if (col <= lastcol) low = 0; else high = nrow; 1153 lastcol = col; 1154 value = v + k*(stepval+4 + l)*4; 1155 while (high-low > 7) { 1156 t = (low+high)/2; 1157 if (rp[t] > col) high = t; 1158 else low = t; 1159 } 1160 for (i=low; i<high; i++) { 1161 if (rp[i] > col) break; 1162 if (rp[i] == col) { 1163 bap = ap + 16*i; 1164 for (ii=0; ii<4; ii++,value+=stepval) { 1165 for (jj=ii; jj<16; jj+=4) { 1166 bap[jj] += *value++; 1167 } 1168 } 1169 goto noinsert2; 1170 } 1171 } 1172 N = nrow++ - 1; 1173 high++; /* added new column index thus must search to one higher than before */ 1174 /* shift up all the later entries in this row */ 1175 for (ii=N; ii>=i; ii--) { 1176 rp[ii+1] = rp[ii]; 1177 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1178 } 1179 if (N >= i) { 1180 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1181 } 1182 rp[i] = col; 1183 bap = ap + 16*i; 1184 for (ii=0; ii<4; ii++,value+=stepval) { 1185 for (jj=ii; jj<16; jj+=4) { 1186 bap[jj] = *value++; 1187 } 1188 } 1189 noinsert2:; 1190 low = i; 1191 } 1192 ailen[row] = nrow; 1193 } 1194 PetscFunctionReturnVoid(); 1195 } 1196 EXTERN_C_END 1197 1198 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1199 #define matsetvalues4_ MATSETVALUES4 1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1201 #define matsetvalues4_ matsetvalues4 1202 #endif 1203 1204 EXTERN_C_BEGIN 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatSetValues4_" 1207 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1208 { 1209 Mat A = *AA; 1210 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1211 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1212 PetscInt *ai=a->i,*ailen=a->ilen; 1213 PetscInt *aj=a->j,brow,bcol; 1214 PetscInt ridx,cidx,lastcol = -1; 1215 MatScalar *ap,value,*aa=a->a,*bap; 1216 1217 PetscFunctionBegin; 1218 for (k=0; k<m; k++) { /* loop over added rows */ 1219 row = im[k]; brow = row/4; 1220 rp = aj + ai[brow]; 1221 ap = aa + 16*ai[brow]; 1222 nrow = ailen[brow]; 1223 low = 0; 1224 high = nrow; 1225 for (l=0; l<n; l++) { /* loop over added columns */ 1226 col = in[l]; bcol = col/4; 1227 ridx = row % 4; cidx = col % 4; 1228 value = v[l + k*n]; 1229 if (col <= lastcol) low = 0; else high = nrow; 1230 lastcol = col; 1231 while (high-low > 7) { 1232 t = (low+high)/2; 1233 if (rp[t] > bcol) high = t; 1234 else low = t; 1235 } 1236 for (i=low; i<high; i++) { 1237 if (rp[i] > bcol) break; 1238 if (rp[i] == bcol) { 1239 bap = ap + 16*i + 4*cidx + ridx; 1240 *bap += value; 1241 goto noinsert1; 1242 } 1243 } 1244 N = nrow++ - 1; 1245 high++; /* added new column thus must search to one higher than before */ 1246 /* shift up all the later entries in this row */ 1247 for (ii=N; ii>=i; ii--) { 1248 rp[ii+1] = rp[ii]; 1249 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1250 } 1251 if (N>=i) { 1252 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1253 } 1254 rp[i] = bcol; 1255 ap[16*i + 4*cidx + ridx] = value; 1256 noinsert1:; 1257 low = i; 1258 } 1259 ailen[brow] = nrow; 1260 } 1261 PetscFunctionReturnVoid(); 1262 } 1263 EXTERN_C_END 1264 1265 /* 1266 Checks for missing diagonals 1267 */ 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1271 { 1272 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1273 PetscErrorCode ierr; 1274 PetscInt *diag,*jj = a->j,i; 1275 1276 PetscFunctionBegin; 1277 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1278 *missing = PETSC_FALSE; 1279 if (A->rmap->n > 0 && !jj) { 1280 *missing = PETSC_TRUE; 1281 if (d) *d = 0; 1282 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1283 } else { 1284 diag = a->diag; 1285 for (i=0; i<a->mbs; i++) { 1286 if (jj[diag[i]] != i) { 1287 *missing = PETSC_TRUE; 1288 if (d) *d = i; 1289 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1290 } 1291 } 1292 } 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1301 PetscErrorCode ierr; 1302 PetscInt i,j,m = a->mbs; 1303 1304 PetscFunctionBegin; 1305 if (!a->diag) { 1306 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1307 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1308 a->free_diag = PETSC_TRUE; 1309 } 1310 for (i=0; i<m; i++) { 1311 a->diag[i] = a->i[i+1]; 1312 for (j=a->i[i]; j<a->i[i+1]; j++) { 1313 if (a->j[j] == i) { 1314 a->diag[i] = j; 1315 break; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1328 { 1329 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1332 PetscInt *tia, *tja; 1333 1334 PetscFunctionBegin; 1335 *nn = n; 1336 if (!ia) PetscFunctionReturn(0); 1337 if (symmetric) { 1338 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1339 nz = tia[n]; 1340 } else { 1341 tia = a->i; tja = a->j; 1342 } 1343 1344 if (!blockcompressed && bs > 1) { 1345 (*nn) *= bs; 1346 /* malloc & create the natural set of indices */ 1347 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1348 if (n) { 1349 (*ia)[0] = 0; 1350 for (j=1; j<bs; j++) { 1351 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1352 } 1353 } 1354 1355 for (i=1; i<n; i++) { 1356 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1357 for (j=1; j<bs; j++) { 1358 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1359 } 1360 } 1361 if (n) { 1362 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1363 } 1364 1365 if (ja) { 1366 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1367 cnt = 0; 1368 for (i=0; i<n; i++) { 1369 for (j=0; j<bs; j++) { 1370 for (k=tia[i]; k<tia[i+1]; k++) { 1371 for (l=0; l<bs; l++) { 1372 (*ja)[cnt++] = bs*tja[k] + l; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 1379 n *= bs; 1380 nz *= bs*bs; 1381 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1382 ierr = PetscFree(tia);CHKERRQ(ierr); 1383 ierr = PetscFree(tja);CHKERRQ(ierr); 1384 } 1385 } else if (oshift == 1) { 1386 if (symmetric) { 1387 PetscInt nz = tia[A->rmap->n/bs]; 1388 /* add 1 to i and j indices */ 1389 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1390 *ia = tia; 1391 if (ja) { 1392 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1393 *ja = tja; 1394 } 1395 } else { 1396 PetscInt nz = a->i[A->rmap->n/bs]; 1397 /* malloc space and add 1 to i and j indices */ 1398 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1399 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1400 if (ja) { 1401 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1402 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1403 } 1404 } 1405 } else { 1406 *ia = tia; 1407 if (ja) *ja = tja; 1408 } 1409 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1416 { 1417 PetscErrorCode ierr; 1418 1419 PetscFunctionBegin; 1420 if (!ia) PetscFunctionReturn(0); 1421 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1422 ierr = PetscFree(*ia);CHKERRQ(ierr); 1423 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1431 { 1432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1433 PetscErrorCode ierr; 1434 1435 PetscFunctionBegin; 1436 #if defined(PETSC_USE_LOG) 1437 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1438 #endif 1439 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1440 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1441 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1442 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1443 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1444 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1445 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1446 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1447 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1448 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1449 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1450 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1451 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1452 1453 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1454 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1455 ierr = PetscFree(A->data);CHKERRQ(ierr); 1456 1457 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1458 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1459 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1461 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1462 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1466 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr); 1467 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1473 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 switch (op) { 1480 case MAT_ROW_ORIENTED: 1481 a->roworiented = flg; 1482 break; 1483 case MAT_KEEP_NONZERO_PATTERN: 1484 a->keepnonzeropattern = flg; 1485 break; 1486 case MAT_NEW_NONZERO_LOCATIONS: 1487 a->nonew = (flg ? 0 : 1); 1488 break; 1489 case MAT_NEW_NONZERO_LOCATION_ERR: 1490 a->nonew = (flg ? -1 : 0); 1491 break; 1492 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1493 a->nonew = (flg ? -2 : 0); 1494 break; 1495 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1496 a->nounused = (flg ? -1 : 0); 1497 break; 1498 case MAT_CHECK_COMPRESSED_ROW: 1499 a->compressedrow.check = flg; 1500 break; 1501 case MAT_NEW_DIAGONALS: 1502 case MAT_IGNORE_OFF_PROC_ENTRIES: 1503 case MAT_USE_HASH_TABLE: 1504 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1505 break; 1506 case MAT_SYMMETRIC: 1507 case MAT_STRUCTURALLY_SYMMETRIC: 1508 case MAT_HERMITIAN: 1509 case MAT_SYMMETRY_ETERNAL: 1510 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1511 break; 1512 default: 1513 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1525 MatScalar *aa,*aa_i; 1526 PetscScalar *v_i; 1527 1528 PetscFunctionBegin; 1529 bs = A->rmap->bs; 1530 ai = a->i; 1531 aj = a->j; 1532 aa = a->a; 1533 bs2 = a->bs2; 1534 1535 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1536 1537 bn = row/bs; /* Block number */ 1538 bp = row % bs; /* Block Position */ 1539 M = ai[bn+1] - ai[bn]; 1540 *nz = bs*M; 1541 1542 if (v) { 1543 *v = 0; 1544 if (*nz) { 1545 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1546 for (i=0; i<M; i++) { /* for each block in the block row */ 1547 v_i = *v + i*bs; 1548 aa_i = aa + bs2*(ai[bn] + i); 1549 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1550 } 1551 } 1552 } 1553 1554 if (idx) { 1555 *idx = 0; 1556 if (*nz) { 1557 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1558 for (i=0; i<M; i++) { /* for each block in the block row */ 1559 idx_i = *idx + i*bs; 1560 itmp = bs*aj[ai[bn] + i]; 1561 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1562 } 1563 } 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1571 { 1572 PetscErrorCode ierr; 1573 1574 PetscFunctionBegin; 1575 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1576 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1577 PetscFunctionReturn(0); 1578 } 1579 1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1585 { 1586 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1587 Mat C; 1588 PetscErrorCode ierr; 1589 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1590 PetscInt *rows,*cols,bs2=a->bs2; 1591 MatScalar *array; 1592 1593 PetscFunctionBegin; 1594 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1595 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1596 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1597 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1598 1599 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1600 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1601 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1602 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1603 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1604 ierr = PetscFree(col);CHKERRQ(ierr); 1605 } else { 1606 C = *B; 1607 } 1608 1609 array = a->a; 1610 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1611 for (i=0; i<mbs; i++) { 1612 cols[0] = i*bs; 1613 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1614 len = ai[i+1] - ai[i]; 1615 for (j=0; j<len; j++) { 1616 rows[0] = (*aj++)*bs; 1617 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1618 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1619 array += bs2; 1620 } 1621 } 1622 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1623 1624 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1625 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1626 1627 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1628 *B = C; 1629 } else { 1630 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1631 } 1632 PetscFunctionReturn(0); 1633 } 1634 1635 EXTERN_C_BEGIN 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1638 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1639 { 1640 PetscErrorCode ierr; 1641 Mat Btrans; 1642 1643 PetscFunctionBegin; 1644 *f = PETSC_FALSE; 1645 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1646 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1647 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1648 PetscFunctionReturn(0); 1649 } 1650 EXTERN_C_END 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1654 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1655 { 1656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1657 PetscErrorCode ierr; 1658 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1659 int fd; 1660 PetscScalar *aa; 1661 FILE *file; 1662 1663 PetscFunctionBegin; 1664 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1665 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1666 col_lens[0] = MAT_FILE_CLASSID; 1667 1668 col_lens[1] = A->rmap->N; 1669 col_lens[2] = A->cmap->n; 1670 col_lens[3] = a->nz*bs2; 1671 1672 /* store lengths of each row and write (including header) to file */ 1673 count = 0; 1674 for (i=0; i<a->mbs; i++) { 1675 for (j=0; j<bs; j++) { 1676 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1677 } 1678 } 1679 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1680 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1681 1682 /* store column indices (zero start index) */ 1683 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1684 count = 0; 1685 for (i=0; i<a->mbs; i++) { 1686 for (j=0; j<bs; j++) { 1687 for (k=a->i[i]; k<a->i[i+1]; k++) { 1688 for (l=0; l<bs; l++) { 1689 jj[count++] = bs*a->j[k] + l; 1690 } 1691 } 1692 } 1693 } 1694 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1695 ierr = PetscFree(jj);CHKERRQ(ierr); 1696 1697 /* store nonzero values */ 1698 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1699 count = 0; 1700 for (i=0; i<a->mbs; i++) { 1701 for (j=0; j<bs; j++) { 1702 for (k=a->i[i]; k<a->i[i+1]; k++) { 1703 for (l=0; l<bs; l++) { 1704 aa[count++] = a->a[bs2*k + l*bs + j]; 1705 } 1706 } 1707 } 1708 } 1709 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1710 ierr = PetscFree(aa);CHKERRQ(ierr); 1711 1712 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1713 if (file) { 1714 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1715 } 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1721 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1722 { 1723 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1724 PetscErrorCode ierr; 1725 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1726 PetscViewerFormat format; 1727 1728 PetscFunctionBegin; 1729 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1730 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1731 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1732 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1733 Mat aij; 1734 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1735 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1736 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1737 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1738 PetscFunctionReturn(0); 1739 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1740 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1741 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1742 for (i=0; i<a->mbs; i++) { 1743 for (j=0; j<bs; j++) { 1744 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1745 for (k=a->i[i]; k<a->i[i+1]; k++) { 1746 for (l=0; l<bs; l++) { 1747 #if defined(PETSC_USE_COMPLEX) 1748 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1749 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1750 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1751 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1752 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1753 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1754 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1755 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1756 } 1757 #else 1758 if (a->a[bs2*k + l*bs + j] != 0.0) { 1759 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1760 } 1761 #endif 1762 } 1763 } 1764 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1765 } 1766 } 1767 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1768 } else { 1769 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1770 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1771 for (i=0; i<a->mbs; i++) { 1772 for (j=0; j<bs; j++) { 1773 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1774 for (k=a->i[i]; k<a->i[i+1]; k++) { 1775 for (l=0; l<bs; l++) { 1776 #if defined(PETSC_USE_COMPLEX) 1777 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1778 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1779 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1780 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1781 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1782 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1783 } else { 1784 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1785 } 1786 #else 1787 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1788 #endif 1789 } 1790 } 1791 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1792 } 1793 } 1794 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1795 } 1796 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1802 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1803 { 1804 Mat A = (Mat) Aa; 1805 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1806 PetscErrorCode ierr; 1807 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1808 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1809 MatScalar *aa; 1810 PetscViewer viewer; 1811 PetscViewerFormat format; 1812 1813 PetscFunctionBegin; 1814 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1815 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1816 1817 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1818 1819 /* loop over matrix elements drawing boxes */ 1820 1821 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1822 color = PETSC_DRAW_BLUE; 1823 for (i=0,row=0; i<mbs; i++,row+=bs) { 1824 for (j=a->i[i]; j<a->i[i+1]; j++) { 1825 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1826 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1827 aa = a->a + j*bs2; 1828 for (k=0; k<bs; k++) { 1829 for (l=0; l<bs; l++) { 1830 if (PetscRealPart(*aa++) >= 0.) continue; 1831 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1832 } 1833 } 1834 } 1835 } 1836 color = PETSC_DRAW_CYAN; 1837 for (i=0,row=0; i<mbs; i++,row+=bs) { 1838 for (j=a->i[i]; j<a->i[i+1]; j++) { 1839 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1840 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1841 aa = a->a + j*bs2; 1842 for (k=0; k<bs; k++) { 1843 for (l=0; l<bs; l++) { 1844 if (PetscRealPart(*aa++) != 0.) continue; 1845 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1846 } 1847 } 1848 } 1849 } 1850 color = PETSC_DRAW_RED; 1851 for (i=0,row=0; i<mbs; i++,row+=bs) { 1852 for (j=a->i[i]; j<a->i[i+1]; j++) { 1853 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1854 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1855 aa = a->a + j*bs2; 1856 for (k=0; k<bs; k++) { 1857 for (l=0; l<bs; l++) { 1858 if (PetscRealPart(*aa++) <= 0.) continue; 1859 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1860 } 1861 } 1862 } 1863 } 1864 } else { 1865 /* use contour shading to indicate magnitude of values */ 1866 /* first determine max of all nonzero values */ 1867 PetscDraw popup; 1868 PetscReal scale,maxv = 0.0; 1869 1870 for (i=0; i<a->nz*a->bs2; i++) { 1871 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1872 } 1873 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1874 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1875 if (popup) {ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);} 1876 for (i=0,row=0; i<mbs; i++,row+=bs) { 1877 for (j=a->i[i]; j<a->i[i+1]; j++) { 1878 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1879 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1880 aa = a->a + j*bs2; 1881 for (k=0; k<bs; k++) { 1882 for (l=0; l<bs; l++) { 1883 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1884 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1885 } 1886 } 1887 } 1888 } 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1895 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1896 { 1897 PetscErrorCode ierr; 1898 PetscReal xl,yl,xr,yr,w,h; 1899 PetscDraw draw; 1900 PetscBool isnull; 1901 1902 PetscFunctionBegin; 1903 1904 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1905 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1906 1907 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1908 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1909 xr += w; yr += h; xl = -w; yl = -h; 1910 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1911 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1912 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatView_SeqBAIJ" 1918 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1919 { 1920 PetscErrorCode ierr; 1921 PetscBool iascii,isbinary,isdraw; 1922 1923 PetscFunctionBegin; 1924 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1925 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1926 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1927 if (iascii){ 1928 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1929 } else if (isbinary) { 1930 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1931 } else if (isdraw) { 1932 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1933 } else { 1934 Mat B; 1935 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1936 ierr = MatView(B,viewer);CHKERRQ(ierr); 1937 ierr = MatDestroy(&B);CHKERRQ(ierr); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1945 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1946 { 1947 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1948 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1949 PetscInt *ai = a->i,*ailen = a->ilen; 1950 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1951 MatScalar *ap,*aa = a->a; 1952 1953 PetscFunctionBegin; 1954 for (k=0; k<m; k++) { /* loop over rows */ 1955 row = im[k]; brow = row/bs; 1956 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1957 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1958 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1959 nrow = ailen[brow]; 1960 for (l=0; l<n; l++) { /* loop over columns */ 1961 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1962 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1963 col = in[l] ; 1964 bcol = col/bs; 1965 cidx = col%bs; 1966 ridx = row%bs; 1967 high = nrow; 1968 low = 0; /* assume unsorted */ 1969 while (high-low > 5) { 1970 t = (low+high)/2; 1971 if (rp[t] > bcol) high = t; 1972 else low = t; 1973 } 1974 for (i=low; i<high; i++) { 1975 if (rp[i] > bcol) break; 1976 if (rp[i] == bcol) { 1977 *v++ = ap[bs2*i+bs*cidx+ridx]; 1978 goto finished; 1979 } 1980 } 1981 *v++ = 0.0; 1982 finished:; 1983 } 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #undef __FUNCT__ 1989 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1990 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1991 { 1992 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1993 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1994 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1995 PetscErrorCode ierr; 1996 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1997 PetscBool roworiented=a->roworiented; 1998 const PetscScalar *value = v; 1999 MatScalar *ap,*aa = a->a,*bap; 2000 2001 PetscFunctionBegin; 2002 if (roworiented) { 2003 stepval = (n-1)*bs; 2004 } else { 2005 stepval = (m-1)*bs; 2006 } 2007 for (k=0; k<m; k++) { /* loop over added rows */ 2008 row = im[k]; 2009 if (row < 0) continue; 2010 #if defined(PETSC_USE_DEBUG) 2011 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 2012 #endif 2013 rp = aj + ai[row]; 2014 ap = aa + bs2*ai[row]; 2015 rmax = imax[row]; 2016 nrow = ailen[row]; 2017 low = 0; 2018 high = nrow; 2019 for (l=0; l<n; l++) { /* loop over added columns */ 2020 if (in[l] < 0) continue; 2021 #if defined(PETSC_USE_DEBUG) 2022 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 2023 #endif 2024 col = in[l]; 2025 if (roworiented) { 2026 value = v + (k*(stepval+bs) + l)*bs; 2027 } else { 2028 value = v + (l*(stepval+bs) + k)*bs; 2029 } 2030 if (col <= lastcol) low = 0; else high = nrow; 2031 lastcol = col; 2032 while (high-low > 7) { 2033 t = (low+high)/2; 2034 if (rp[t] > col) high = t; 2035 else low = t; 2036 } 2037 for (i=low; i<high; i++) { 2038 if (rp[i] > col) break; 2039 if (rp[i] == col) { 2040 bap = ap + bs2*i; 2041 if (roworiented) { 2042 if (is == ADD_VALUES) { 2043 for (ii=0; ii<bs; ii++,value+=stepval) { 2044 for (jj=ii; jj<bs2; jj+=bs) { 2045 bap[jj] += *value++; 2046 } 2047 } 2048 } else { 2049 for (ii=0; ii<bs; ii++,value+=stepval) { 2050 for (jj=ii; jj<bs2; jj+=bs) { 2051 bap[jj] = *value++; 2052 } 2053 } 2054 } 2055 } else { 2056 if (is == ADD_VALUES) { 2057 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2058 for (jj=0; jj<bs; jj++) { 2059 bap[jj] += value[jj]; 2060 } 2061 bap += bs; 2062 } 2063 } else { 2064 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2065 for (jj=0; jj<bs; jj++) { 2066 bap[jj] = value[jj]; 2067 } 2068 bap += bs; 2069 } 2070 } 2071 } 2072 goto noinsert2; 2073 } 2074 } 2075 if (nonew == 1) goto noinsert2; 2076 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2077 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2078 N = nrow++ - 1; high++; 2079 /* shift up all the later entries in this row */ 2080 for (ii=N; ii>=i; ii--) { 2081 rp[ii+1] = rp[ii]; 2082 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2083 } 2084 if (N >= i) { 2085 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2086 } 2087 rp[i] = col; 2088 bap = ap + bs2*i; 2089 if (roworiented) { 2090 for (ii=0; ii<bs; ii++,value+=stepval) { 2091 for (jj=ii; jj<bs2; jj+=bs) { 2092 bap[jj] = *value++; 2093 } 2094 } 2095 } else { 2096 for (ii=0; ii<bs; ii++,value+=stepval) { 2097 for (jj=0; jj<bs; jj++) { 2098 *bap++ = *value++; 2099 } 2100 } 2101 } 2102 noinsert2:; 2103 low = i; 2104 } 2105 ailen[row] = nrow; 2106 } 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2112 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2113 { 2114 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2115 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2116 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2117 PetscErrorCode ierr; 2118 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2119 MatScalar *aa = a->a,*ap; 2120 PetscReal ratio=0.6; 2121 2122 PetscFunctionBegin; 2123 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2124 2125 if (m) rmax = ailen[0]; 2126 for (i=1; i<mbs; i++) { 2127 /* move each row back by the amount of empty slots (fshift) before it*/ 2128 fshift += imax[i-1] - ailen[i-1]; 2129 rmax = PetscMax(rmax,ailen[i]); 2130 if (fshift) { 2131 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2132 N = ailen[i]; 2133 for (j=0; j<N; j++) { 2134 ip[j-fshift] = ip[j]; 2135 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2136 } 2137 } 2138 ai[i] = ai[i-1] + ailen[i-1]; 2139 } 2140 if (mbs) { 2141 fshift += imax[mbs-1] - ailen[mbs-1]; 2142 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2143 } 2144 /* reset ilen and imax for each row */ 2145 for (i=0; i<mbs; i++) { 2146 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2147 } 2148 a->nz = ai[mbs]; 2149 2150 /* diagonals may have moved, so kill the diagonal pointers */ 2151 a->idiagvalid = PETSC_FALSE; 2152 if (fshift && a->diag) { 2153 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2154 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2155 a->diag = 0; 2156 } 2157 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2158 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2159 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2160 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2161 A->info.mallocs += a->reallocs; 2162 a->reallocs = 0; 2163 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2164 2165 ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2166 A->same_nonzero = PETSC_TRUE; 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /* 2171 This function returns an array of flags which indicate the locations of contiguous 2172 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2173 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2174 Assume: sizes should be long enough to hold all the values. 2175 */ 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2178 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2179 { 2180 PetscInt i,j,k,row; 2181 PetscBool flg; 2182 2183 PetscFunctionBegin; 2184 for (i=0,j=0; i<n; j++) { 2185 row = idx[i]; 2186 if (row%bs!=0) { /* Not the begining of a block */ 2187 sizes[j] = 1; 2188 i++; 2189 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2190 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2191 i++; 2192 } else { /* Begining of the block, so check if the complete block exists */ 2193 flg = PETSC_TRUE; 2194 for (k=1; k<bs; k++) { 2195 if (row+k != idx[i+k]) { /* break in the block */ 2196 flg = PETSC_FALSE; 2197 break; 2198 } 2199 } 2200 if (flg) { /* No break in the bs */ 2201 sizes[j] = bs; 2202 i+= bs; 2203 } else { 2204 sizes[j] = 1; 2205 i++; 2206 } 2207 } 2208 } 2209 *bs_max = j; 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2215 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2216 { 2217 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2218 PetscErrorCode ierr; 2219 PetscInt i,j,k,count,*rows; 2220 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2221 PetscScalar zero = 0.0; 2222 MatScalar *aa; 2223 const PetscScalar *xx; 2224 PetscScalar *bb; 2225 2226 PetscFunctionBegin; 2227 /* fix right hand side if needed */ 2228 if (x && b) { 2229 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2230 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2231 for (i=0; i<is_n; i++) { 2232 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2233 } 2234 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2235 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2236 } 2237 2238 /* Make a copy of the IS and sort it */ 2239 /* allocate memory for rows,sizes */ 2240 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2241 2242 /* copy IS values to rows, and sort them */ 2243 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2244 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2245 2246 if (baij->keepnonzeropattern) { 2247 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2248 bs_max = is_n; 2249 A->same_nonzero = PETSC_TRUE; 2250 } else { 2251 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2252 A->same_nonzero = PETSC_FALSE; 2253 } 2254 2255 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2256 row = rows[j]; 2257 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2258 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2259 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2260 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2261 if (diag != (PetscScalar)0.0) { 2262 if (baij->ilen[row/bs] > 0) { 2263 baij->ilen[row/bs] = 1; 2264 baij->j[baij->i[row/bs]] = row/bs; 2265 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2266 } 2267 /* Now insert all the diagonal values for this bs */ 2268 for (k=0; k<bs; k++) { 2269 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2270 } 2271 } else { /* (diag == 0.0) */ 2272 baij->ilen[row/bs] = 0; 2273 } /* end (diag == 0.0) */ 2274 } else { /* (sizes[i] != bs) */ 2275 #if defined (PETSC_USE_DEBUG) 2276 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2277 #endif 2278 for (k=0; k<count; k++) { 2279 aa[0] = zero; 2280 aa += bs; 2281 } 2282 if (diag != (PetscScalar)0.0) { 2283 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2284 } 2285 } 2286 } 2287 2288 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2289 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2295 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2296 { 2297 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2298 PetscErrorCode ierr; 2299 PetscInt i,j,k,count; 2300 PetscInt bs=A->rmap->bs,bs2=baij->bs2,row,col; 2301 PetscScalar zero = 0.0; 2302 MatScalar *aa; 2303 const PetscScalar *xx; 2304 PetscScalar *bb; 2305 PetscBool *zeroed,vecs = PETSC_FALSE; 2306 2307 PetscFunctionBegin; 2308 /* fix right hand side if needed */ 2309 if (x && b) { 2310 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2311 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2312 vecs = PETSC_TRUE; 2313 } 2314 A->same_nonzero = PETSC_TRUE; 2315 2316 /* zero the columns */ 2317 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2318 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2319 for (i=0; i<is_n; i++) { 2320 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2321 zeroed[is_idx[i]] = PETSC_TRUE; 2322 } 2323 for (i=0; i<A->rmap->N; i++) { 2324 if (!zeroed[i]) { 2325 row = i/bs; 2326 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2327 for (k=0; k<bs; k++) { 2328 col = bs*baij->j[j] + k; 2329 if (zeroed[col]) { 2330 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2331 if (vecs) bb[i] -= aa[0]*xx[col]; 2332 aa[0] = 0.0; 2333 } 2334 } 2335 } 2336 } else if (vecs) bb[i] = diag*xx[i]; 2337 } 2338 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2339 if (vecs) { 2340 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2341 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2342 } 2343 2344 /* zero the rows */ 2345 for (i=0; i<is_n; i++) { 2346 row = is_idx[i]; 2347 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2348 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2349 for (k=0; k<count; k++) { 2350 aa[0] = zero; 2351 aa += bs; 2352 } 2353 if (diag != (PetscScalar)0.0) { 2354 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2355 } 2356 } 2357 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2363 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2364 { 2365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2366 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2367 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2368 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2369 PetscErrorCode ierr; 2370 PetscInt ridx,cidx,bs2=a->bs2; 2371 PetscBool roworiented=a->roworiented; 2372 MatScalar *ap,value,*aa=a->a,*bap; 2373 2374 PetscFunctionBegin; 2375 if (v) PetscValidScalarPointer(v,6); 2376 for (k=0; k<m; k++) { /* loop over added rows */ 2377 row = im[k]; 2378 brow = row/bs; 2379 if (row < 0) continue; 2380 #if defined(PETSC_USE_DEBUG) 2381 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2382 #endif 2383 rp = aj + ai[brow]; 2384 ap = aa + bs2*ai[brow]; 2385 rmax = imax[brow]; 2386 nrow = ailen[brow]; 2387 low = 0; 2388 high = nrow; 2389 for (l=0; l<n; l++) { /* loop over added columns */ 2390 if (in[l] < 0) continue; 2391 #if defined(PETSC_USE_DEBUG) 2392 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2393 #endif 2394 col = in[l]; bcol = col/bs; 2395 ridx = row % bs; cidx = col % bs; 2396 if (roworiented) { 2397 value = v[l + k*n]; 2398 } else { 2399 value = v[k + l*m]; 2400 } 2401 if (col <= lastcol) low = 0; else high = nrow; 2402 lastcol = col; 2403 while (high-low > 7) { 2404 t = (low+high)/2; 2405 if (rp[t] > bcol) high = t; 2406 else low = t; 2407 } 2408 for (i=low; i<high; i++) { 2409 if (rp[i] > bcol) break; 2410 if (rp[i] == bcol) { 2411 bap = ap + bs2*i + bs*cidx + ridx; 2412 if (is == ADD_VALUES) *bap += value; 2413 else *bap = value; 2414 goto noinsert1; 2415 } 2416 } 2417 if (nonew == 1) goto noinsert1; 2418 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2419 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2420 N = nrow++ - 1; high++; 2421 /* shift up all the later entries in this row */ 2422 for (ii=N; ii>=i; ii--) { 2423 rp[ii+1] = rp[ii]; 2424 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2425 } 2426 if (N>=i) { 2427 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2428 } 2429 rp[i] = bcol; 2430 ap[bs2*i + bs*cidx + ridx] = value; 2431 a->nz++; 2432 noinsert1:; 2433 low = i; 2434 } 2435 ailen[brow] = nrow; 2436 } 2437 A->same_nonzero = PETSC_FALSE; 2438 PetscFunctionReturn(0); 2439 } 2440 2441 #undef __FUNCT__ 2442 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2443 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2444 { 2445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2446 Mat outA; 2447 PetscErrorCode ierr; 2448 PetscBool row_identity,col_identity; 2449 2450 PetscFunctionBegin; 2451 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2452 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2453 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2454 if (!row_identity || !col_identity) { 2455 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2456 } 2457 2458 outA = inA; 2459 inA->factortype = MAT_FACTOR_LU; 2460 2461 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2462 2463 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2464 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2465 a->row = row; 2466 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2467 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2468 a->col = col; 2469 2470 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2471 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2472 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2473 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2474 2475 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2476 if (!a->solve_work) { 2477 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2478 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2479 } 2480 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2481 2482 PetscFunctionReturn(0); 2483 } 2484 2485 EXTERN_C_BEGIN 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2488 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2489 { 2490 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2491 PetscInt i,nz,mbs; 2492 2493 PetscFunctionBegin; 2494 nz = baij->maxnz; 2495 mbs = baij->mbs; 2496 for (i=0; i<nz; i++) { 2497 baij->j[i] = indices[i]; 2498 } 2499 baij->nz = nz; 2500 for (i=0; i<mbs; i++) { 2501 baij->ilen[i] = baij->imax[i]; 2502 } 2503 PetscFunctionReturn(0); 2504 } 2505 EXTERN_C_END 2506 2507 #undef __FUNCT__ 2508 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2509 /*@ 2510 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2511 in the matrix. 2512 2513 Input Parameters: 2514 + mat - the SeqBAIJ matrix 2515 - indices - the column indices 2516 2517 Level: advanced 2518 2519 Notes: 2520 This can be called if you have precomputed the nonzero structure of the 2521 matrix and want to provide it to the matrix object to improve the performance 2522 of the MatSetValues() operation. 2523 2524 You MUST have set the correct numbers of nonzeros per row in the call to 2525 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2526 2527 MUST be called before any calls to MatSetValues(); 2528 2529 @*/ 2530 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2531 { 2532 PetscErrorCode ierr; 2533 2534 PetscFunctionBegin; 2535 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2536 PetscValidPointer(indices,2); 2537 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 #undef __FUNCT__ 2542 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2543 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2544 { 2545 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2546 PetscErrorCode ierr; 2547 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2548 PetscReal atmp; 2549 PetscScalar *x,zero = 0.0; 2550 MatScalar *aa; 2551 PetscInt ncols,brow,krow,kcol; 2552 2553 PetscFunctionBegin; 2554 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2555 bs = A->rmap->bs; 2556 aa = a->a; 2557 ai = a->i; 2558 aj = a->j; 2559 mbs = a->mbs; 2560 2561 ierr = VecSet(v,zero);CHKERRQ(ierr); 2562 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2563 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2564 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2565 for (i=0; i<mbs; i++) { 2566 ncols = ai[1] - ai[0]; ai++; 2567 brow = bs*i; 2568 for (j=0; j<ncols; j++){ 2569 for (kcol=0; kcol<bs; kcol++){ 2570 for (krow=0; krow<bs; krow++){ 2571 atmp = PetscAbsScalar(*aa);aa++; 2572 row = brow + krow; /* row index */ 2573 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2574 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2575 } 2576 } 2577 aj++; 2578 } 2579 } 2580 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } 2583 2584 #undef __FUNCT__ 2585 #define __FUNCT__ "MatCopy_SeqBAIJ" 2586 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2587 { 2588 PetscErrorCode ierr; 2589 2590 PetscFunctionBegin; 2591 /* If the two matrices have the same copy implementation, use fast copy. */ 2592 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2593 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2594 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2595 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2596 2597 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2598 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2599 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2600 } else { 2601 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2602 } 2603 PetscFunctionReturn(0); 2604 } 2605 2606 #undef __FUNCT__ 2607 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2608 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2609 { 2610 PetscErrorCode ierr; 2611 2612 PetscFunctionBegin; 2613 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2614 PetscFunctionReturn(0); 2615 } 2616 2617 #undef __FUNCT__ 2618 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2619 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2620 { 2621 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2622 PetscFunctionBegin; 2623 *array = a->a; 2624 PetscFunctionReturn(0); 2625 } 2626 2627 #undef __FUNCT__ 2628 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2629 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2630 { 2631 PetscFunctionBegin; 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2637 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2638 { 2639 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2640 PetscErrorCode ierr; 2641 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2642 PetscBLASInt one=1; 2643 2644 PetscFunctionBegin; 2645 if (str == SAME_NONZERO_PATTERN) { 2646 PetscScalar alpha = a; 2647 PetscInt bnz = PetscBLASIntCast(x->nz*bs2); 2648 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2649 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2650 if (y->xtoy && y->XtoY != X) { 2651 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2652 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2653 } 2654 if (!y->xtoy) { /* get xtoy */ 2655 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2656 y->XtoY = X; 2657 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2658 } 2659 for (i=0; i<x->nz; i++) { 2660 j = 0; 2661 while (j < bs2){ 2662 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2663 j++; 2664 } 2665 } 2666 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2667 } else { 2668 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2669 } 2670 PetscFunctionReturn(0); 2671 } 2672 2673 #undef __FUNCT__ 2674 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2675 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2676 { 2677 PetscInt rbs,cbs; 2678 PetscErrorCode ierr; 2679 2680 PetscFunctionBegin; 2681 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2682 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2683 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2684 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2685 PetscFunctionReturn(0); 2686 } 2687 2688 #undef __FUNCT__ 2689 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2690 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2691 { 2692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2693 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2694 MatScalar *aa = a->a; 2695 2696 PetscFunctionBegin; 2697 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2698 PetscFunctionReturn(0); 2699 } 2700 2701 #undef __FUNCT__ 2702 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2703 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2704 { 2705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2706 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2707 MatScalar *aa = a->a; 2708 2709 PetscFunctionBegin; 2710 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2711 PetscFunctionReturn(0); 2712 } 2713 2714 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2718 /* 2719 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2720 */ 2721 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2722 { 2723 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2724 PetscErrorCode ierr; 2725 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2726 PetscInt nz = a->i[m],row,*jj,mr,col; 2727 2728 PetscFunctionBegin; 2729 *nn = n; 2730 if (!ia) PetscFunctionReturn(0); 2731 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2732 else { 2733 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2734 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2735 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2736 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2737 jj = a->j; 2738 for (i=0; i<nz; i++) { 2739 collengths[jj[i]]++; 2740 } 2741 cia[0] = oshift; 2742 for (i=0; i<n; i++) { 2743 cia[i+1] = cia[i] + collengths[i]; 2744 } 2745 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2746 jj = a->j; 2747 for (row=0; row<m; row++) { 2748 mr = a->i[row+1] - a->i[row]; 2749 for (i=0; i<mr; i++) { 2750 col = *jj++; 2751 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2752 } 2753 } 2754 ierr = PetscFree(collengths);CHKERRQ(ierr); 2755 *ia = cia; *ja = cja; 2756 } 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2762 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2763 { 2764 PetscErrorCode ierr; 2765 2766 PetscFunctionBegin; 2767 if (!ia) PetscFunctionReturn(0); 2768 ierr = PetscFree(*ia);CHKERRQ(ierr); 2769 ierr = PetscFree(*ja);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2775 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2776 { 2777 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2778 PetscErrorCode ierr; 2779 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2780 PetscScalar dx,*y,*xx,*w3_array; 2781 PetscScalar *vscale_array; 2782 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2783 Vec w1=coloring->w1,w2=coloring->w2,w3; 2784 void *fctx = coloring->fctx; 2785 PetscBool flg = PETSC_FALSE; 2786 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2787 Vec x1_tmp; 2788 2789 PetscFunctionBegin; 2790 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2791 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2792 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2793 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2794 2795 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2796 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2797 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2798 if (flg) { 2799 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2800 } else { 2801 PetscBool assembled; 2802 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2803 if (assembled) { 2804 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2805 } 2806 } 2807 2808 x1_tmp = x1; 2809 if (!coloring->vscale){ 2810 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2811 } 2812 2813 /* 2814 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2815 coloring->F for the coarser grids from the finest 2816 */ 2817 if (coloring->F) { 2818 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2819 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2820 if (m1 != m2) { 2821 coloring->F = 0; 2822 } 2823 } 2824 2825 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2826 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2827 } 2828 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2829 2830 /* Set w1 = F(x1) */ 2831 if (coloring->F) { 2832 w1 = coloring->F; /* use already computed value of function */ 2833 coloring->F = 0; 2834 } else { 2835 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2836 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2837 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2838 } 2839 2840 if (!coloring->w3) { 2841 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2843 } 2844 w3 = coloring->w3; 2845 2846 CHKMEMQ; 2847 /* Compute all the local scale factors, including ghost points */ 2848 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2849 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2850 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2851 if (ctype == IS_COLORING_GHOSTED){ 2852 col_start = 0; col_end = N; 2853 } else if (ctype == IS_COLORING_GLOBAL){ 2854 xx = xx - start; 2855 vscale_array = vscale_array - start; 2856 col_start = start; col_end = N + start; 2857 } CHKMEMQ; 2858 for (col=col_start; col<col_end; col++){ 2859 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2860 if (coloring->htype[0] == 'w') { 2861 dx = 1.0 + unorm; 2862 } else { 2863 dx = xx[col]; 2864 } 2865 if (dx == (PetscScalar)0.0) dx = 1.0; 2866 #if !defined(PETSC_USE_COMPLEX) 2867 if (dx < umin && dx >= 0.0) dx = umin; 2868 else if (dx < 0.0 && dx > -umin) dx = -umin; 2869 #else 2870 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2871 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2872 #endif 2873 dx *= epsilon; 2874 vscale_array[col] = (PetscScalar)1.0/dx; 2875 } CHKMEMQ; 2876 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2877 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2878 if (ctype == IS_COLORING_GLOBAL){ 2879 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2880 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2881 } 2882 CHKMEMQ; 2883 if (coloring->vscaleforrow) { 2884 vscaleforrow = coloring->vscaleforrow; 2885 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2886 2887 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2888 /* 2889 Loop over each color 2890 */ 2891 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2892 for (k=0; k<coloring->ncolors; k++) { 2893 coloring->currentcolor = k; 2894 for (i=0; i<bs; i++) { 2895 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2896 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2897 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2898 /* 2899 Loop over each column associated with color 2900 adding the perturbation to the vector w3. 2901 */ 2902 for (l=0; l<coloring->ncolumns[k]; l++) { 2903 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2904 if (coloring->htype[0] == 'w') { 2905 dx = 1.0 + unorm; 2906 } else { 2907 dx = xx[col]; 2908 } 2909 if (dx == (PetscScalar)0.0) dx = 1.0; 2910 #if !defined(PETSC_USE_COMPLEX) 2911 if (dx < umin && dx >= 0.0) dx = umin; 2912 else if (dx < 0.0 && dx > -umin) dx = -umin; 2913 #else 2914 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2915 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2916 #endif 2917 dx *= epsilon; 2918 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2919 w3_array[col] += dx; 2920 } 2921 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2922 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2923 2924 /* 2925 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2926 w2 = F(x1 + dx) - F(x1) 2927 */ 2928 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2929 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2930 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2931 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2932 2933 /* 2934 Loop over rows of vector, putting results into Jacobian matrix 2935 */ 2936 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2937 for (l=0; l<coloring->nrows[k]; l++) { 2938 row = bs*coloring->rows[k][l]; /* local row index */ 2939 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2940 for (j=0; j<bs; j++) { 2941 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2942 srows[j] = row + start + j; 2943 } 2944 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2945 } 2946 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2947 } 2948 } /* endof for each color */ 2949 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2950 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2951 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2952 ierr = PetscFree(srows);CHKERRQ(ierr); 2953 2954 coloring->currentcolor = -1; 2955 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2956 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2957 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2958 PetscFunctionReturn(0); 2959 } 2960 2961 /* -------------------------------------------------------------------*/ 2962 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2963 MatGetRow_SeqBAIJ, 2964 MatRestoreRow_SeqBAIJ, 2965 MatMult_SeqBAIJ_N, 2966 /* 4*/ MatMultAdd_SeqBAIJ_N, 2967 MatMultTranspose_SeqBAIJ, 2968 MatMultTransposeAdd_SeqBAIJ, 2969 0, 2970 0, 2971 0, 2972 /*10*/ 0, 2973 MatLUFactor_SeqBAIJ, 2974 0, 2975 0, 2976 MatTranspose_SeqBAIJ, 2977 /*15*/ MatGetInfo_SeqBAIJ, 2978 MatEqual_SeqBAIJ, 2979 MatGetDiagonal_SeqBAIJ, 2980 MatDiagonalScale_SeqBAIJ, 2981 MatNorm_SeqBAIJ, 2982 /*20*/ 0, 2983 MatAssemblyEnd_SeqBAIJ, 2984 MatSetOption_SeqBAIJ, 2985 MatZeroEntries_SeqBAIJ, 2986 /*24*/ MatZeroRows_SeqBAIJ, 2987 0, 2988 0, 2989 0, 2990 0, 2991 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2992 0, 2993 0, 2994 MatGetArray_SeqBAIJ, 2995 MatRestoreArray_SeqBAIJ, 2996 /*34*/ MatDuplicate_SeqBAIJ, 2997 0, 2998 0, 2999 MatILUFactor_SeqBAIJ, 3000 0, 3001 /*39*/ MatAXPY_SeqBAIJ, 3002 MatGetSubMatrices_SeqBAIJ, 3003 MatIncreaseOverlap_SeqBAIJ, 3004 MatGetValues_SeqBAIJ, 3005 MatCopy_SeqBAIJ, 3006 /*44*/ 0, 3007 MatScale_SeqBAIJ, 3008 0, 3009 0, 3010 MatZeroRowsColumns_SeqBAIJ, 3011 /*49*/ MatSetBlockSize_SeqBAIJ, 3012 MatGetRowIJ_SeqBAIJ, 3013 MatRestoreRowIJ_SeqBAIJ, 3014 MatGetColumnIJ_SeqBAIJ, 3015 MatRestoreColumnIJ_SeqBAIJ, 3016 /*54*/ MatFDColoringCreate_SeqAIJ, 3017 0, 3018 0, 3019 0, 3020 MatSetValuesBlocked_SeqBAIJ, 3021 /*59*/ MatGetSubMatrix_SeqBAIJ, 3022 MatDestroy_SeqBAIJ, 3023 MatView_SeqBAIJ, 3024 0, 3025 0, 3026 /*64*/ 0, 3027 0, 3028 0, 3029 0, 3030 0, 3031 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 3032 0, 3033 MatConvert_Basic, 3034 0, 3035 0, 3036 /*74*/ 0, 3037 MatFDColoringApply_BAIJ, 3038 0, 3039 0, 3040 0, 3041 /*79*/ 0, 3042 0, 3043 0, 3044 0, 3045 MatLoad_SeqBAIJ, 3046 /*84*/ 0, 3047 0, 3048 0, 3049 0, 3050 0, 3051 /*89*/ 0, 3052 0, 3053 0, 3054 0, 3055 0, 3056 /*94*/ 0, 3057 0, 3058 0, 3059 0, 3060 0, 3061 /*99*/0, 3062 0, 3063 0, 3064 0, 3065 0, 3066 /*104*/0, 3067 MatRealPart_SeqBAIJ, 3068 MatImaginaryPart_SeqBAIJ, 3069 0, 3070 0, 3071 /*109*/0, 3072 0, 3073 0, 3074 0, 3075 MatMissingDiagonal_SeqBAIJ, 3076 /*114*/0, 3077 0, 3078 0, 3079 0, 3080 0, 3081 /*119*/0, 3082 0, 3083 MatMultHermitianTranspose_SeqBAIJ, 3084 MatMultHermitianTransposeAdd_SeqBAIJ, 3085 0, 3086 }; 3087 3088 EXTERN_C_BEGIN 3089 #undef __FUNCT__ 3090 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 3091 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3092 { 3093 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3094 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3095 PetscErrorCode ierr; 3096 3097 PetscFunctionBegin; 3098 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3099 3100 /* allocate space for values if not already there */ 3101 if (!aij->saved_values) { 3102 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 3103 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3104 } 3105 3106 /* copy values over */ 3107 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3108 PetscFunctionReturn(0); 3109 } 3110 EXTERN_C_END 3111 3112 EXTERN_C_BEGIN 3113 #undef __FUNCT__ 3114 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3115 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3116 { 3117 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3118 PetscErrorCode ierr; 3119 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3120 3121 PetscFunctionBegin; 3122 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3123 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3124 3125 /* copy values over */ 3126 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3127 PetscFunctionReturn(0); 3128 } 3129 EXTERN_C_END 3130 3131 EXTERN_C_BEGIN 3132 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3133 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3134 EXTERN_C_END 3135 3136 EXTERN_C_BEGIN 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3139 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3140 { 3141 Mat_SeqBAIJ *b; 3142 PetscErrorCode ierr; 3143 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3144 PetscBool flg,skipallocation = PETSC_FALSE; 3145 3146 PetscFunctionBegin; 3147 3148 if (nz == MAT_SKIP_ALLOCATION) { 3149 skipallocation = PETSC_TRUE; 3150 nz = 0; 3151 } 3152 3153 if (bs < 0) { 3154 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3155 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3156 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3157 bs = PetscAbs(bs); 3158 } 3159 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3160 bs = newbs; 3161 3162 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3163 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3164 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3165 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3166 3167 B->preallocated = PETSC_TRUE; 3168 3169 mbs = B->rmap->n/bs; 3170 nbs = B->cmap->n/bs; 3171 bs2 = bs*bs; 3172 3173 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3174 3175 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3176 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3177 if (nnz) { 3178 for (i=0; i<mbs; i++) { 3179 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3180 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3181 } 3182 } 3183 3184 b = (Mat_SeqBAIJ*)B->data; 3185 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3186 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3187 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3188 3189 if (!flg) { 3190 switch (bs) { 3191 case 1: 3192 B->ops->mult = MatMult_SeqBAIJ_1; 3193 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3194 B->ops->sor = MatSOR_SeqBAIJ_1; 3195 break; 3196 case 2: 3197 B->ops->mult = MatMult_SeqBAIJ_2; 3198 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3199 B->ops->sor = MatSOR_SeqBAIJ_2; 3200 break; 3201 case 3: 3202 B->ops->mult = MatMult_SeqBAIJ_3; 3203 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3204 B->ops->sor = MatSOR_SeqBAIJ_3; 3205 break; 3206 case 4: 3207 B->ops->mult = MatMult_SeqBAIJ_4; 3208 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3209 B->ops->sor = MatSOR_SeqBAIJ_4; 3210 break; 3211 case 5: 3212 B->ops->mult = MatMult_SeqBAIJ_5; 3213 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3214 B->ops->sor = MatSOR_SeqBAIJ_5; 3215 break; 3216 case 6: 3217 B->ops->mult = MatMult_SeqBAIJ_6; 3218 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3219 B->ops->sor = MatSOR_SeqBAIJ_6; 3220 break; 3221 case 7: 3222 B->ops->mult = MatMult_SeqBAIJ_7; 3223 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3224 B->ops->sor = MatSOR_SeqBAIJ_7; 3225 break; 3226 case 15: 3227 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3228 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3229 B->ops->sor = MatSOR_SeqBAIJ_N; 3230 break; 3231 default: 3232 B->ops->mult = MatMult_SeqBAIJ_N; 3233 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3234 B->ops->sor = MatSOR_SeqBAIJ_N; 3235 break; 3236 } 3237 } 3238 B->rmap->bs = bs; 3239 b->mbs = mbs; 3240 b->nbs = nbs; 3241 if (!skipallocation) { 3242 if (!b->imax) { 3243 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3244 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3245 b->free_imax_ilen = PETSC_TRUE; 3246 } 3247 /* b->ilen will count nonzeros in each block row so far. */ 3248 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3249 if (!nnz) { 3250 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3251 else if (nz < 0) nz = 1; 3252 for (i=0; i<mbs; i++) b->imax[i] = nz; 3253 nz = nz*mbs; 3254 } else { 3255 nz = 0; 3256 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3257 } 3258 3259 /* allocate the matrix space */ 3260 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3261 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3262 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3263 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3264 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3265 b->singlemalloc = PETSC_TRUE; 3266 b->i[0] = 0; 3267 for (i=1; i<mbs+1; i++) { 3268 b->i[i] = b->i[i-1] + b->imax[i-1]; 3269 } 3270 b->free_a = PETSC_TRUE; 3271 b->free_ij = PETSC_TRUE; 3272 } else { 3273 b->free_a = PETSC_FALSE; 3274 b->free_ij = PETSC_FALSE; 3275 } 3276 3277 B->rmap->bs = bs; 3278 b->bs2 = bs2; 3279 b->mbs = mbs; 3280 b->nz = 0; 3281 b->maxnz = nz; 3282 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3283 PetscFunctionReturn(0); 3284 } 3285 EXTERN_C_END 3286 3287 EXTERN_C_BEGIN 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3290 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3291 { 3292 PetscInt i,m,nz,nz_max=0,*nnz; 3293 PetscScalar *values=0; 3294 PetscErrorCode ierr; 3295 3296 PetscFunctionBegin; 3297 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3298 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3299 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3300 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3301 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3302 m = B->rmap->n/bs; 3303 3304 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3305 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3306 for(i=0; i<m; i++) { 3307 nz = ii[i+1]- ii[i]; 3308 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3309 nz_max = PetscMax(nz_max, nz); 3310 nnz[i] = nz; 3311 } 3312 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3313 ierr = PetscFree(nnz);CHKERRQ(ierr); 3314 3315 values = (PetscScalar*)V; 3316 if (!values) { 3317 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3318 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3319 } 3320 for (i=0; i<m; i++) { 3321 PetscInt ncols = ii[i+1] - ii[i]; 3322 const PetscInt *icols = jj + ii[i]; 3323 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3324 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3325 } 3326 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3327 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 3330 PetscFunctionReturn(0); 3331 } 3332 EXTERN_C_END 3333 3334 3335 EXTERN_C_BEGIN 3336 extern PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3337 extern PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3338 #if defined(PETSC_HAVE_MUMPS) 3339 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3340 #endif 3341 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3342 EXTERN_C_END 3343 3344 /*MC 3345 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3346 block sparse compressed row format. 3347 3348 Options Database Keys: 3349 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3350 3351 Level: beginner 3352 3353 .seealso: MatCreateSeqBAIJ() 3354 M*/ 3355 3356 EXTERN_C_BEGIN 3357 extern PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3358 EXTERN_C_END 3359 3360 EXTERN_C_BEGIN 3361 #undef __FUNCT__ 3362 #define __FUNCT__ "MatCreate_SeqBAIJ" 3363 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3364 { 3365 PetscErrorCode ierr; 3366 PetscMPIInt size; 3367 Mat_SeqBAIJ *b; 3368 3369 PetscFunctionBegin; 3370 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3371 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3372 3373 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3374 B->data = (void*)b; 3375 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3376 b->row = 0; 3377 b->col = 0; 3378 b->icol = 0; 3379 b->reallocs = 0; 3380 b->saved_values = 0; 3381 3382 b->roworiented = PETSC_TRUE; 3383 b->nonew = 0; 3384 b->diag = 0; 3385 b->solve_work = 0; 3386 b->mult_work = 0; 3387 B->spptr = 0; 3388 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3389 b->keepnonzeropattern = PETSC_FALSE; 3390 b->xtoy = 0; 3391 b->XtoY = 0; 3392 B->same_nonzero = PETSC_FALSE; 3393 3394 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3395 "MatGetFactorAvailable_seqbaij_petsc", 3396 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3397 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3398 "MatGetFactor_seqbaij_petsc", 3399 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C", 3401 "MatGetFactor_seqbaij_bstrm", 3402 MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3403 #if defined(PETSC_HAVE_MUMPS) 3404 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3405 #endif 3406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3407 "MatInvertBlockDiagonal_SeqBAIJ", 3408 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3410 "MatStoreValues_SeqBAIJ", 3411 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3413 "MatRetrieveValues_SeqBAIJ", 3414 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3416 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3417 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3418 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3419 "MatConvert_SeqBAIJ_SeqAIJ", 3420 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3421 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3422 "MatConvert_SeqBAIJ_SeqSBAIJ", 3423 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3424 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3425 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3426 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3427 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3428 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3429 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3430 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C", 3431 "MatConvert_SeqBAIJ_SeqBSTRM", 3432 MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3433 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3434 "MatIsTranspose_SeqBAIJ", 3435 MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3436 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3437 PetscFunctionReturn(0); 3438 } 3439 EXTERN_C_END 3440 3441 #undef __FUNCT__ 3442 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3443 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3444 { 3445 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3446 PetscErrorCode ierr; 3447 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3448 3449 PetscFunctionBegin; 3450 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3451 3452 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3453 c->imax = a->imax; 3454 c->ilen = a->ilen; 3455 c->free_imax_ilen = PETSC_FALSE; 3456 } else { 3457 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3458 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3459 for (i=0; i<mbs; i++) { 3460 c->imax[i] = a->imax[i]; 3461 c->ilen[i] = a->ilen[i]; 3462 } 3463 c->free_imax_ilen = PETSC_TRUE; 3464 } 3465 3466 /* allocate the matrix space */ 3467 if (mallocmatspace){ 3468 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3469 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3470 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3471 c->singlemalloc = PETSC_FALSE; 3472 c->free_ij = PETSC_FALSE; 3473 c->i = a->i; 3474 c->j = a->j; 3475 c->parent = A; 3476 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3477 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3478 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3479 } else { 3480 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3481 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3482 c->singlemalloc = PETSC_TRUE; 3483 c->free_ij = PETSC_TRUE; 3484 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3485 if (mbs > 0) { 3486 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3487 if (cpvalues == MAT_COPY_VALUES) { 3488 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3489 } else { 3490 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3491 } 3492 } 3493 } 3494 } 3495 3496 c->roworiented = a->roworiented; 3497 c->nonew = a->nonew; 3498 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3499 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3500 c->bs2 = a->bs2; 3501 c->mbs = a->mbs; 3502 c->nbs = a->nbs; 3503 3504 if (a->diag) { 3505 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3506 c->diag = a->diag; 3507 c->free_diag = PETSC_FALSE; 3508 } else { 3509 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3510 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3511 for (i=0; i<mbs; i++) { 3512 c->diag[i] = a->diag[i]; 3513 } 3514 c->free_diag = PETSC_TRUE; 3515 } 3516 } else c->diag = 0; 3517 c->nz = a->nz; 3518 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3519 c->solve_work = 0; 3520 c->mult_work = 0; 3521 c->free_a = PETSC_TRUE; 3522 c->free_ij = PETSC_TRUE; 3523 C->preallocated = PETSC_TRUE; 3524 C->assembled = PETSC_TRUE; 3525 3526 c->compressedrow.use = a->compressedrow.use; 3527 c->compressedrow.nrows = a->compressedrow.nrows; 3528 c->compressedrow.check = a->compressedrow.check; 3529 if (a->compressedrow.use){ 3530 i = a->compressedrow.nrows; 3531 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3532 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3533 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3534 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3535 } else { 3536 c->compressedrow.use = PETSC_FALSE; 3537 c->compressedrow.i = PETSC_NULL; 3538 c->compressedrow.rindex = PETSC_NULL; 3539 } 3540 C->same_nonzero = A->same_nonzero; 3541 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3542 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3543 PetscFunctionReturn(0); 3544 } 3545 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3548 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3549 { 3550 PetscErrorCode ierr; 3551 3552 PetscFunctionBegin; 3553 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3554 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3555 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3556 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3557 PetscFunctionReturn(0); 3558 } 3559 3560 #undef __FUNCT__ 3561 #define __FUNCT__ "MatLoad_SeqBAIJ" 3562 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3563 { 3564 Mat_SeqBAIJ *a; 3565 PetscErrorCode ierr; 3566 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3567 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3568 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3569 PetscInt *masked,nmask,tmp,bs2,ishift; 3570 PetscMPIInt size; 3571 int fd; 3572 PetscScalar *aa; 3573 MPI_Comm comm = ((PetscObject)viewer)->comm; 3574 3575 PetscFunctionBegin; 3576 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3577 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3578 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3579 bs2 = bs*bs; 3580 3581 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3582 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3583 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3584 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3585 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3586 M = header[1]; N = header[2]; nz = header[3]; 3587 3588 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3589 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3590 3591 /* 3592 This code adds extra rows to make sure the number of rows is 3593 divisible by the blocksize 3594 */ 3595 mbs = M/bs; 3596 extra_rows = bs - M + bs*(mbs); 3597 if (extra_rows == bs) extra_rows = 0; 3598 else mbs++; 3599 if (extra_rows) { 3600 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3601 } 3602 3603 /* Set global sizes if not already set */ 3604 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3605 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3606 } else { /* Check if the matrix global sizes are correct */ 3607 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3608 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3609 } 3610 3611 /* read in row lengths */ 3612 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3613 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3614 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3615 3616 /* read in column indices */ 3617 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3618 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3619 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3620 3621 /* loop over row lengths determining block row lengths */ 3622 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3623 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3624 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3625 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3626 rowcount = 0; 3627 nzcount = 0; 3628 for (i=0; i<mbs; i++) { 3629 nmask = 0; 3630 for (j=0; j<bs; j++) { 3631 kmax = rowlengths[rowcount]; 3632 for (k=0; k<kmax; k++) { 3633 tmp = jj[nzcount++]/bs; 3634 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3635 } 3636 rowcount++; 3637 } 3638 browlengths[i] += nmask; 3639 /* zero out the mask elements we set */ 3640 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3641 } 3642 3643 /* Do preallocation */ 3644 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3645 a = (Mat_SeqBAIJ*)newmat->data; 3646 3647 /* set matrix "i" values */ 3648 a->i[0] = 0; 3649 for (i=1; i<= mbs; i++) { 3650 a->i[i] = a->i[i-1] + browlengths[i-1]; 3651 a->ilen[i-1] = browlengths[i-1]; 3652 } 3653 a->nz = 0; 3654 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3655 3656 /* read in nonzero values */ 3657 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3658 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3659 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3660 3661 /* set "a" and "j" values into matrix */ 3662 nzcount = 0; jcount = 0; 3663 for (i=0; i<mbs; i++) { 3664 nzcountb = nzcount; 3665 nmask = 0; 3666 for (j=0; j<bs; j++) { 3667 kmax = rowlengths[i*bs+j]; 3668 for (k=0; k<kmax; k++) { 3669 tmp = jj[nzcount++]/bs; 3670 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3671 } 3672 } 3673 /* sort the masked values */ 3674 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3675 3676 /* set "j" values into matrix */ 3677 maskcount = 1; 3678 for (j=0; j<nmask; j++) { 3679 a->j[jcount++] = masked[j]; 3680 mask[masked[j]] = maskcount++; 3681 } 3682 /* set "a" values into matrix */ 3683 ishift = bs2*a->i[i]; 3684 for (j=0; j<bs; j++) { 3685 kmax = rowlengths[i*bs+j]; 3686 for (k=0; k<kmax; k++) { 3687 tmp = jj[nzcountb]/bs ; 3688 block = mask[tmp] - 1; 3689 point = jj[nzcountb] - bs*tmp; 3690 idx = ishift + bs2*block + j + bs*point; 3691 a->a[idx] = (MatScalar)aa[nzcountb++]; 3692 } 3693 } 3694 /* zero out the mask elements we set */ 3695 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3696 } 3697 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3698 3699 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3700 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3701 ierr = PetscFree(aa);CHKERRQ(ierr); 3702 ierr = PetscFree(jj);CHKERRQ(ierr); 3703 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3704 3705 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3706 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3708 PetscFunctionReturn(0); 3709 } 3710 3711 #undef __FUNCT__ 3712 #define __FUNCT__ "MatCreateSeqBAIJ" 3713 /*@C 3714 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3715 compressed row) format. For good matrix assembly performance the 3716 user should preallocate the matrix storage by setting the parameter nz 3717 (or the array nnz). By setting these parameters accurately, performance 3718 during matrix assembly can be increased by more than a factor of 50. 3719 3720 Collective on MPI_Comm 3721 3722 Input Parameters: 3723 + comm - MPI communicator, set to PETSC_COMM_SELF 3724 . bs - size of block 3725 . m - number of rows 3726 . n - number of columns 3727 . nz - number of nonzero blocks per block row (same for all rows) 3728 - nnz - array containing the number of nonzero blocks in the various block rows 3729 (possibly different for each block row) or PETSC_NULL 3730 3731 Output Parameter: 3732 . A - the matrix 3733 3734 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3735 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3736 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3737 3738 Options Database Keys: 3739 . -mat_no_unroll - uses code that does not unroll the loops in the 3740 block calculations (much slower) 3741 . -mat_block_size - size of the blocks to use 3742 3743 Level: intermediate 3744 3745 Notes: 3746 The number of rows and columns must be divisible by blocksize. 3747 3748 If the nnz parameter is given then the nz parameter is ignored 3749 3750 A nonzero block is any block that as 1 or more nonzeros in it 3751 3752 The block AIJ format is fully compatible with standard Fortran 77 3753 storage. That is, the stored row and column indices can begin at 3754 either one (as in Fortran) or zero. See the users' manual for details. 3755 3756 Specify the preallocated storage with either nz or nnz (not both). 3757 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3758 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3759 matrices. 3760 3761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3762 @*/ 3763 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3764 { 3765 PetscErrorCode ierr; 3766 3767 PetscFunctionBegin; 3768 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3769 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3770 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3771 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3777 /*@C 3778 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3779 per row in the matrix. For good matrix assembly performance the 3780 user should preallocate the matrix storage by setting the parameter nz 3781 (or the array nnz). By setting these parameters accurately, performance 3782 during matrix assembly can be increased by more than a factor of 50. 3783 3784 Collective on MPI_Comm 3785 3786 Input Parameters: 3787 + A - the matrix 3788 . bs - size of block 3789 . nz - number of block nonzeros per block row (same for all rows) 3790 - nnz - array containing the number of block nonzeros in the various block rows 3791 (possibly different for each block row) or PETSC_NULL 3792 3793 Options Database Keys: 3794 . -mat_no_unroll - uses code that does not unroll the loops in the 3795 block calculations (much slower) 3796 . -mat_block_size - size of the blocks to use 3797 3798 Level: intermediate 3799 3800 Notes: 3801 If the nnz parameter is given then the nz parameter is ignored 3802 3803 You can call MatGetInfo() to get information on how effective the preallocation was; 3804 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3805 You can also run with the option -info and look for messages with the string 3806 malloc in them to see if additional memory allocation was needed. 3807 3808 The block AIJ format is fully compatible with standard Fortran 77 3809 storage. That is, the stored row and column indices can begin at 3810 either one (as in Fortran) or zero. See the users' manual for details. 3811 3812 Specify the preallocated storage with either nz or nnz (not both). 3813 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3814 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3815 3816 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3817 @*/ 3818 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3819 { 3820 PetscErrorCode ierr; 3821 3822 PetscFunctionBegin; 3823 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3824 PetscFunctionReturn(0); 3825 } 3826 3827 #undef __FUNCT__ 3828 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3829 /*@C 3830 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3831 (the default sequential PETSc format). 3832 3833 Collective on MPI_Comm 3834 3835 Input Parameters: 3836 + A - the matrix 3837 . i - the indices into j for the start of each local row (starts with zero) 3838 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3839 - v - optional values in the matrix 3840 3841 Level: developer 3842 3843 .keywords: matrix, aij, compressed row, sparse 3844 3845 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3846 @*/ 3847 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3848 { 3849 PetscErrorCode ierr; 3850 3851 PetscFunctionBegin; 3852 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3853 PetscFunctionReturn(0); 3854 } 3855 3856 3857 #undef __FUNCT__ 3858 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3859 /*@ 3860 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3861 3862 Collective on MPI_Comm 3863 3864 Input Parameters: 3865 + comm - must be an MPI communicator of size 1 3866 . bs - size of block 3867 . m - number of rows 3868 . n - number of columns 3869 . i - row indices 3870 . j - column indices 3871 - a - matrix values 3872 3873 Output Parameter: 3874 . mat - the matrix 3875 3876 Level: advanced 3877 3878 Notes: 3879 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3880 once the matrix is destroyed 3881 3882 You cannot set new nonzero locations into this matrix, that will generate an error. 3883 3884 The i and j indices are 0 based 3885 3886 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3887 3888 3889 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3890 3891 @*/ 3892 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3893 { 3894 PetscErrorCode ierr; 3895 PetscInt ii; 3896 Mat_SeqBAIJ *baij; 3897 3898 PetscFunctionBegin; 3899 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3900 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3901 3902 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3903 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3904 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3905 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3906 baij = (Mat_SeqBAIJ*)(*mat)->data; 3907 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3908 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3909 3910 baij->i = i; 3911 baij->j = j; 3912 baij->a = a; 3913 baij->singlemalloc = PETSC_FALSE; 3914 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3915 baij->free_a = PETSC_FALSE; 3916 baij->free_ij = PETSC_FALSE; 3917 3918 for (ii=0; ii<m; ii++) { 3919 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3920 #if defined(PETSC_USE_DEBUG) 3921 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3922 #endif 3923 } 3924 #if defined(PETSC_USE_DEBUG) 3925 for (ii=0; ii<baij->i[m]; ii++) { 3926 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3927 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3928 } 3929 #endif 3930 3931 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 PetscFunctionReturn(0); 3934 } 3935