1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "petscblaslapack.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@ 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr,(*f)(Mat); 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 29 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 32 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 33 if (f) { 34 ierr = (*f)(mat);CHKERRQ(ierr); 35 } else { 36 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 37 } 38 PetscFunctionReturn(0); 39 } 40 41 EXTERN_C_BEGIN 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 45 { 46 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 47 PetscErrorCode ierr; 48 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 49 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 50 PetscReal shift = 0.0; 51 52 PetscFunctionBegin; 53 if (a->idiagvalid) PetscFunctionReturn(0); 54 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 55 diag_offset = a->diag; 56 if (!a->idiag) { 57 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 58 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs2*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 1: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 1*diag_offset[i]; 67 diag[0] = odiag[0]; 68 mdiag[0] = odiag[0]; 69 diag[0] = 1.0 / (diag[0] + shift); 70 diag += 1; 71 mdiag += 1; 72 } 73 break; 74 case 2: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 4*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 79 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80 diag += 4; 81 mdiag += 4; 82 } 83 break; 84 case 3: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 9*diag_offset[i]; 87 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89 diag[8] = odiag[8]; 90 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92 mdiag[8] = odiag[8]; 93 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94 diag += 9; 95 mdiag += 9; 96 } 97 break; 98 case 4: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 16*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104 diag += 16; 105 mdiag += 16; 106 } 107 break; 108 case 5: 109 for (i=0; i<mbs; i++) { 110 odiag = v + 25*diag_offset[i]; 111 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 113 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 114 diag += 25; 115 mdiag += 25; 116 } 117 break; 118 case 6: 119 for (i=0; i<mbs; i++) { 120 odiag = v + 36*diag_offset[i]; 121 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124 diag += 36; 125 mdiag += 36; 126 } 127 break; 128 case 7: 129 for (i=0; i<mbs; i++) { 130 odiag = v + 49*diag_offset[i]; 131 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 132 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 133 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 134 diag += 49; 135 mdiag += 49; 136 } 137 break; 138 default: 139 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 140 for (i=0; i<mbs; i++) { 141 odiag = v + bs2*diag_offset[i]; 142 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 143 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 144 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 145 diag += bs2; 146 mdiag += bs2; 147 } 148 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 149 } 150 a->idiagvalid = PETSC_TRUE; 151 PetscFunctionReturn(0); 152 } 153 EXTERN_C_END 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 157 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 158 { 159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 160 PetscScalar *x,x1,s1; 161 const PetscScalar *b; 162 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 163 PetscErrorCode ierr; 164 PetscInt m = a->mbs,i,i2,nz,j; 165 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 166 167 PetscFunctionBegin; 168 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 169 its = its*lits; 170 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 171 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 172 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 173 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 174 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 175 176 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 177 178 diag = a->diag; 179 idiag = a->idiag; 180 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 181 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 182 183 if (flag & SOR_ZERO_INITIAL_GUESS) { 184 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 185 x[0] = b[0]*idiag[0]; 186 i2 = 1; 187 idiag += 1; 188 for (i=1; i<m; i++) { 189 v = aa + ai[i]; 190 vi = aj + ai[i]; 191 nz = diag[i] - ai[i]; 192 s1 = b[i2]; 193 for (j=0; j<nz; j++) { 194 s1 -= v[j]*x[vi[j]]; 195 } 196 x[i2] = idiag[0]*s1; 197 idiag += 1; 198 i2 += 1; 199 } 200 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 201 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 202 } 203 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 204 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 205 i2 = 0; 206 mdiag = a->idiag+a->mbs; 207 for (i=0; i<m; i++) { 208 x1 = x[i2]; 209 x[i2] = mdiag[0]*x1; 210 mdiag += 1; 211 i2 += 1; 212 } 213 ierr = PetscLogFlops(m);CHKERRQ(ierr); 214 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 215 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 216 } 217 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 218 idiag = a->idiag+a->mbs - 1; 219 i2 = m - 1; 220 x1 = x[i2]; 221 x[i2] = idiag[0]*x1; 222 idiag -= 1; 223 i2 -= 1; 224 for (i=m-2; i>=0; i--) { 225 v = aa + (diag[i]+1); 226 vi = aj + diag[i] + 1; 227 nz = ai[i+1] - diag[i] - 1; 228 s1 = x[i2]; 229 for (j=0; j<nz; j++) { 230 s1 -= v[j]*x[vi[j]]; 231 } 232 x[i2] = idiag[0]*s1; 233 idiag -= 1; 234 i2 -= 1; 235 } 236 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 237 } 238 } else { 239 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 240 } 241 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 248 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 251 PetscScalar *x,x1,x2,s1,s2; 252 const PetscScalar *b; 253 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 254 PetscErrorCode ierr; 255 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 256 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 257 258 PetscFunctionBegin; 259 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 260 its = its*lits; 261 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 262 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 263 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 264 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 265 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 266 267 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 268 269 diag = a->diag; 270 idiag = a->idiag; 271 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 272 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 273 274 if (flag & SOR_ZERO_INITIAL_GUESS) { 275 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 276 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 277 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 278 i2 = 2; 279 idiag += 4; 280 for (i=1; i<m; i++) { 281 v = aa + 4*ai[i]; 282 vi = aj + ai[i]; 283 nz = diag[i] - ai[i]; 284 s1 = b[i2]; s2 = b[i2+1]; 285 for (j=0; j<nz; j++) { 286 idx = 2*vi[j]; 287 it = 4*j; 288 x1 = x[idx]; x2 = x[1+idx]; 289 s1 -= v[it]*x1 + v[it+2]*x2; 290 s2 -= v[it+1]*x1 + v[it+3]*x2; 291 } 292 x[i2] = idiag[0]*s1 + idiag[2]*s2; 293 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 294 idiag += 4; 295 i2 += 2; 296 } 297 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 298 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 299 } 300 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 301 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 302 i2 = 0; 303 mdiag = a->idiag+4*a->mbs; 304 for (i=0; i<m; i++) { 305 x1 = x[i2]; x2 = x[i2+1]; 306 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 307 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 308 mdiag += 4; 309 i2 += 2; 310 } 311 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 312 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 313 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 314 } 315 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 316 idiag = a->idiag+4*a->mbs - 4; 317 i2 = 2*m - 2; 318 x1 = x[i2]; x2 = x[i2+1]; 319 x[i2] = idiag[0]*x1 + idiag[2]*x2; 320 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 321 idiag -= 4; 322 i2 -= 2; 323 for (i=m-2; i>=0; i--) { 324 v = aa + 4*(diag[i]+1); 325 vi = aj + diag[i] + 1; 326 nz = ai[i+1] - diag[i] - 1; 327 s1 = x[i2]; s2 = x[i2+1]; 328 for (j=0; j<nz; j++) { 329 idx = 2*vi[j]; 330 it = 4*j; 331 x1 = x[idx]; x2 = x[1+idx]; 332 s1 -= v[it]*x1 + v[it+2]*x2; 333 s2 -= v[it+1]*x1 + v[it+3]*x2; 334 } 335 x[i2] = idiag[0]*s1 + idiag[2]*s2; 336 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 337 idiag -= 4; 338 i2 -= 2; 339 } 340 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 341 } 342 } else { 343 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 344 } 345 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 346 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 352 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 353 { 354 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 355 PetscScalar *x,x1,x2,x3,s1,s2,s3; 356 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 357 const PetscScalar *b; 358 PetscErrorCode ierr; 359 PetscInt m = a->mbs,i,i2,nz,idx; 360 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 361 362 PetscFunctionBegin; 363 its = its*lits; 364 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 365 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 366 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 367 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 368 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 369 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 370 371 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 372 373 diag = a->diag; 374 idiag = a->idiag; 375 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 376 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 377 378 if (flag & SOR_ZERO_INITIAL_GUESS) { 379 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 380 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 381 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 382 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 383 i2 = 3; 384 idiag += 9; 385 for (i=1; i<m; i++) { 386 v = aa + 9*ai[i]; 387 vi = aj + ai[i]; 388 nz = diag[i] - ai[i]; 389 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 390 while (nz--) { 391 idx = 3*(*vi++); 392 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 393 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 394 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 395 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 396 v += 9; 397 } 398 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 399 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 400 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 401 idiag += 9; 402 i2 += 3; 403 } 404 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 405 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 406 } 407 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 408 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 409 i2 = 0; 410 mdiag = a->idiag+9*a->mbs; 411 for (i=0; i<m; i++) { 412 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 413 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 414 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 415 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 416 mdiag += 9; 417 i2 += 3; 418 } 419 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 420 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 421 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 422 } 423 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 424 idiag = a->idiag+9*a->mbs - 9; 425 i2 = 3*m - 3; 426 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 427 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 428 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 429 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 430 idiag -= 9; 431 i2 -= 3; 432 for (i=m-2; i>=0; i--) { 433 v = aa + 9*(diag[i]+1); 434 vi = aj + diag[i] + 1; 435 nz = ai[i+1] - diag[i] - 1; 436 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 437 while (nz--) { 438 idx = 3*(*vi++); 439 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 440 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 441 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 442 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 443 v += 9; 444 } 445 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 446 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 447 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 448 idiag -= 9; 449 i2 -= 3; 450 } 451 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 452 } 453 } else { 454 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 455 } 456 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 457 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 463 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 464 { 465 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 466 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 467 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 468 const PetscScalar *b; 469 PetscErrorCode ierr; 470 PetscInt m = a->mbs,i,i2,nz,idx; 471 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 472 473 PetscFunctionBegin; 474 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 475 its = its*lits; 476 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 477 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 478 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 479 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 480 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 481 482 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 483 484 diag = a->diag; 485 idiag = a->idiag; 486 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 487 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 488 489 if (flag & SOR_ZERO_INITIAL_GUESS) { 490 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 491 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 492 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 493 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 494 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 495 i2 = 4; 496 idiag += 16; 497 for (i=1; i<m; i++) { 498 v = aa + 16*ai[i]; 499 vi = aj + ai[i]; 500 nz = diag[i] - ai[i]; 501 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 502 while (nz--) { 503 idx = 4*(*vi++); 504 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 505 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 506 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 507 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 508 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 509 v += 16; 510 } 511 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 512 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 513 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 514 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 515 idiag += 16; 516 i2 += 4; 517 } 518 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 519 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 520 } 521 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 522 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 523 i2 = 0; 524 mdiag = a->idiag+16*a->mbs; 525 for (i=0; i<m; i++) { 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 527 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 528 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 529 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 530 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 531 mdiag += 16; 532 i2 += 4; 533 } 534 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 535 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 536 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 537 } 538 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 539 idiag = a->idiag+16*a->mbs - 16; 540 i2 = 4*m - 4; 541 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 542 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 543 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 544 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 545 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 546 idiag -= 16; 547 i2 -= 4; 548 for (i=m-2; i>=0; i--) { 549 v = aa + 16*(diag[i]+1); 550 vi = aj + diag[i] + 1; 551 nz = ai[i+1] - diag[i] - 1; 552 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 553 while (nz--) { 554 idx = 4*(*vi++); 555 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 556 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 557 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 558 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 559 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 560 v += 16; 561 } 562 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 563 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 564 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 565 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 566 idiag -= 16; 567 i2 -= 4; 568 } 569 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 570 } 571 } else { 572 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 573 } 574 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 581 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 582 { 583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 584 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 585 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 586 const PetscScalar *b; 587 PetscErrorCode ierr; 588 PetscInt m = a->mbs,i,i2,nz,idx; 589 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 590 591 PetscFunctionBegin; 592 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 593 its = its*lits; 594 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 595 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 596 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 597 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 598 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 599 600 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 601 602 diag = a->diag; 603 idiag = a->idiag; 604 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 605 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 606 607 if (flag & SOR_ZERO_INITIAL_GUESS) { 608 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 609 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 610 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 611 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 612 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 613 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 614 i2 = 5; 615 idiag += 25; 616 for (i=1; i<m; i++) { 617 v = aa + 25*ai[i]; 618 vi = aj + ai[i]; 619 nz = diag[i] - ai[i]; 620 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 621 while (nz--) { 622 idx = 5*(*vi++); 623 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 624 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 625 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 626 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 627 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 628 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 629 v += 25; 630 } 631 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 632 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 633 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 634 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 635 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 636 idiag += 25; 637 i2 += 5; 638 } 639 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 640 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 641 } 642 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 643 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 644 i2 = 0; 645 mdiag = a->idiag+25*a->mbs; 646 for (i=0; i<m; i++) { 647 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 648 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 649 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 650 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 651 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 652 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 653 mdiag += 25; 654 i2 += 5; 655 } 656 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 657 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 658 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 659 } 660 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 661 idiag = a->idiag+25*a->mbs - 25; 662 i2 = 5*m - 5; 663 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 664 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 665 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 666 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 667 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 668 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 669 idiag -= 25; 670 i2 -= 5; 671 for (i=m-2; i>=0; i--) { 672 v = aa + 25*(diag[i]+1); 673 vi = aj + diag[i] + 1; 674 nz = ai[i+1] - diag[i] - 1; 675 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 676 while (nz--) { 677 idx = 5*(*vi++); 678 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 679 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 680 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 681 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 682 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 683 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 684 v += 25; 685 } 686 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 687 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 688 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 689 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 690 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 691 idiag -= 25; 692 i2 -= 5; 693 } 694 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 695 } 696 } else { 697 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 698 } 699 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 700 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 706 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 707 { 708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 709 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 710 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 711 const PetscScalar *b; 712 PetscErrorCode ierr; 713 PetscInt m = a->mbs,i,i2,nz,idx; 714 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 715 716 PetscFunctionBegin; 717 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 718 its = its*lits; 719 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 720 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 721 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 722 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 723 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 724 725 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 726 727 diag = a->diag; 728 idiag = a->idiag; 729 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 730 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 731 732 if (flag & SOR_ZERO_INITIAL_GUESS) { 733 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 734 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 735 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 736 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 737 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 738 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 739 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 740 i2 = 6; 741 idiag += 36; 742 for (i=1; i<m; i++) { 743 v = aa + 36*ai[i]; 744 vi = aj + ai[i]; 745 nz = diag[i] - ai[i]; 746 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 747 while (nz--) { 748 idx = 6*(*vi++); 749 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 750 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 751 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 752 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 753 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 754 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 755 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 756 v += 36; 757 } 758 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 759 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 760 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 761 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 762 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 763 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 764 idiag += 36; 765 i2 += 6; 766 } 767 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 768 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 769 } 770 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 771 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 772 i2 = 0; 773 mdiag = a->idiag+36*a->mbs; 774 for (i=0; i<m; i++) { 775 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 776 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 777 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 778 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 779 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 780 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 781 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 782 mdiag += 36; 783 i2 += 6; 784 } 785 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 786 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 787 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 788 } 789 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 790 idiag = a->idiag+36*a->mbs - 36; 791 i2 = 6*m - 6; 792 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 793 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 794 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 795 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 796 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 797 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 798 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 799 idiag -= 36; 800 i2 -= 6; 801 for (i=m-2; i>=0; i--) { 802 v = aa + 36*(diag[i]+1); 803 vi = aj + diag[i] + 1; 804 nz = ai[i+1] - diag[i] - 1; 805 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 806 while (nz--) { 807 idx = 6*(*vi++); 808 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 809 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 810 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 811 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 812 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 813 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 814 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 815 v += 36; 816 } 817 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 818 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 819 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 820 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 821 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 822 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 823 idiag -= 36; 824 i2 -= 6; 825 } 826 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 827 } 828 } else { 829 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 830 } 831 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 832 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 838 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 839 { 840 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 841 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 842 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 843 const PetscScalar *b; 844 PetscErrorCode ierr; 845 PetscInt m = a->mbs,i,i2,nz,idx; 846 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 847 848 PetscFunctionBegin; 849 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 850 its = its*lits; 851 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 852 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 853 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 854 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 855 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 856 857 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 858 859 diag = a->diag; 860 idiag = a->idiag; 861 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 862 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 863 864 if (flag & SOR_ZERO_INITIAL_GUESS) { 865 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 866 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 867 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 868 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 869 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 870 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 871 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 872 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 873 i2 = 7; 874 idiag += 49; 875 for (i=1; i<m; i++) { 876 v = aa + 49*ai[i]; 877 vi = aj + ai[i]; 878 nz = diag[i] - ai[i]; 879 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 880 while (nz--) { 881 idx = 7*(*vi++); 882 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 883 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 884 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 885 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 886 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 887 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 888 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 889 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 890 v += 49; 891 } 892 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 893 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 894 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 895 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 896 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 897 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 898 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 899 idiag += 49; 900 i2 += 7; 901 } 902 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 903 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 904 } 905 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 906 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 907 i2 = 0; 908 mdiag = a->idiag+49*a->mbs; 909 for (i=0; i<m; i++) { 910 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 911 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 912 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 913 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 914 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 915 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 916 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 917 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 918 mdiag += 49; 919 i2 += 7; 920 } 921 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 922 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 923 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 924 } 925 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 926 idiag = a->idiag+49*a->mbs - 49; 927 i2 = 7*m - 7; 928 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 929 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 930 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 931 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 932 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 933 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 934 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 935 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 936 idiag -= 49; 937 i2 -= 7; 938 for (i=m-2; i>=0; i--) { 939 v = aa + 49*(diag[i]+1); 940 vi = aj + diag[i] + 1; 941 nz = ai[i+1] - diag[i] - 1; 942 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 943 while (nz--) { 944 idx = 7*(*vi++); 945 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 946 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 947 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 948 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 949 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 950 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 951 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 952 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 953 v += 49; 954 } 955 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 956 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 957 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 958 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 959 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 960 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 961 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 962 idiag -= 49; 963 i2 -= 7; 964 } 965 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 966 } 967 } else { 968 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 969 } 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 977 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 978 { 979 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 980 PetscScalar *x,*work,*w,*workt; 981 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 982 const PetscScalar *b; 983 PetscErrorCode ierr; 984 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 985 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 986 987 PetscFunctionBegin; 988 its = its*lits; 989 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 990 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 991 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 992 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 993 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 994 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 995 996 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 997 998 diag = a->diag; 999 idiag = a->idiag; 1000 if (!a->mult_work) { 1001 k = PetscMax(A->rmap->n,A->cmap->n); 1002 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 1003 } 1004 work = a->mult_work; 1005 if (!a->sor_work) { 1006 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1007 } 1008 w = a->sor_work; 1009 1010 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1012 1013 if (flag & SOR_ZERO_INITIAL_GUESS) { 1014 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1015 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1016 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1017 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1018 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1019 i2 = bs; 1020 idiag += bs2; 1021 for (i=1; i<m; i++) { 1022 v = aa + bs2*ai[i]; 1023 vi = aj + ai[i]; 1024 nz = diag[i] - ai[i]; 1025 1026 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1027 /* copy all rows of x that are needed into contiguous space */ 1028 workt = work; 1029 for (j=0; j<nz; j++) { 1030 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1031 workt += bs; 1032 } 1033 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1034 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1035 while (nz--) { 1036 idx = N*(*vi++); 1037 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1038 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1039 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1040 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1041 v += N2; 1042 } */ 1043 1044 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1045 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1046 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1047 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1048 1049 idiag += bs2; 1050 i2 += bs; 1051 } 1052 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1053 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1054 } 1055 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1056 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1057 i2 = 0; 1058 mdiag = a->idiag+bs2*a->mbs; 1059 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1060 for (i=0; i<m; i++) { 1061 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1062 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1063 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1064 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1065 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1066 1067 mdiag += bs2; 1068 i2 += bs; 1069 } 1070 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1071 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1072 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1073 } 1074 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1075 idiag = a->idiag+bs2*a->mbs - bs2; 1076 i2 = bs*m - bs; 1077 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1078 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1079 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1080 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1081 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1082 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1083 idiag -= bs2; 1084 i2 -= bs; 1085 for (i=m-2; i>=0; i--) { 1086 v = aa + bs2*(diag[i]+1); 1087 vi = aj + diag[i] + 1; 1088 nz = ai[i+1] - diag[i] - 1; 1089 1090 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1091 /* copy all rows of x that are needed into contiguous space */ 1092 workt = work; 1093 for (j=0; j<nz; j++) { 1094 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1095 workt += bs; 1096 } 1097 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1098 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1099 while (nz--) { 1100 idx = N*(*vi++); 1101 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1102 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1103 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1104 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1105 v += N2; 1106 } */ 1107 1108 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1109 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1110 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1111 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1112 idiag -= bs2; 1113 i2 -= bs; 1114 } 1115 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1116 } 1117 } else { 1118 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1119 } 1120 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1121 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 /* 1126 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1127 */ 1128 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1129 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1130 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1131 #define matsetvaluesblocked4_ matsetvaluesblocked4 1132 #endif 1133 1134 EXTERN_C_BEGIN 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "matsetvaluesblocked4_" 1137 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1138 { 1139 Mat A = *AA; 1140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1141 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1142 PetscInt *ai=a->i,*ailen=a->ilen; 1143 PetscInt *aj=a->j,stepval,lastcol = -1; 1144 const PetscScalar *value = v; 1145 MatScalar *ap,*aa = a->a,*bap; 1146 1147 PetscFunctionBegin; 1148 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1149 stepval = (n-1)*4; 1150 for (k=0; k<m; k++) { /* loop over added rows */ 1151 row = im[k]; 1152 rp = aj + ai[row]; 1153 ap = aa + 16*ai[row]; 1154 nrow = ailen[row]; 1155 low = 0; 1156 high = nrow; 1157 for (l=0; l<n; l++) { /* loop over added columns */ 1158 col = in[l]; 1159 if (col <= lastcol) low = 0; else high = nrow; 1160 lastcol = col; 1161 value = v + k*(stepval+4 + l)*4; 1162 while (high-low > 7) { 1163 t = (low+high)/2; 1164 if (rp[t] > col) high = t; 1165 else low = t; 1166 } 1167 for (i=low; i<high; i++) { 1168 if (rp[i] > col) break; 1169 if (rp[i] == col) { 1170 bap = ap + 16*i; 1171 for (ii=0; ii<4; ii++,value+=stepval) { 1172 for (jj=ii; jj<16; jj+=4) { 1173 bap[jj] += *value++; 1174 } 1175 } 1176 goto noinsert2; 1177 } 1178 } 1179 N = nrow++ - 1; 1180 high++; /* added new column index thus must search to one higher than before */ 1181 /* shift up all the later entries in this row */ 1182 for (ii=N; ii>=i; ii--) { 1183 rp[ii+1] = rp[ii]; 1184 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1185 } 1186 if (N >= i) { 1187 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1188 } 1189 rp[i] = col; 1190 bap = ap + 16*i; 1191 for (ii=0; ii<4; ii++,value+=stepval) { 1192 for (jj=ii; jj<16; jj+=4) { 1193 bap[jj] = *value++; 1194 } 1195 } 1196 noinsert2:; 1197 low = i; 1198 } 1199 ailen[row] = nrow; 1200 } 1201 PetscFunctionReturnVoid(); 1202 } 1203 EXTERN_C_END 1204 1205 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1206 #define matsetvalues4_ MATSETVALUES4 1207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1208 #define matsetvalues4_ matsetvalues4 1209 #endif 1210 1211 EXTERN_C_BEGIN 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatSetValues4_" 1214 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1215 { 1216 Mat A = *AA; 1217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1218 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1219 PetscInt *ai=a->i,*ailen=a->ilen; 1220 PetscInt *aj=a->j,brow,bcol; 1221 PetscInt ridx,cidx,lastcol = -1; 1222 MatScalar *ap,value,*aa=a->a,*bap; 1223 1224 PetscFunctionBegin; 1225 for (k=0; k<m; k++) { /* loop over added rows */ 1226 row = im[k]; brow = row/4; 1227 rp = aj + ai[brow]; 1228 ap = aa + 16*ai[brow]; 1229 nrow = ailen[brow]; 1230 low = 0; 1231 high = nrow; 1232 for (l=0; l<n; l++) { /* loop over added columns */ 1233 col = in[l]; bcol = col/4; 1234 ridx = row % 4; cidx = col % 4; 1235 value = v[l + k*n]; 1236 if (col <= lastcol) low = 0; else high = nrow; 1237 lastcol = col; 1238 while (high-low > 7) { 1239 t = (low+high)/2; 1240 if (rp[t] > bcol) high = t; 1241 else low = t; 1242 } 1243 for (i=low; i<high; i++) { 1244 if (rp[i] > bcol) break; 1245 if (rp[i] == bcol) { 1246 bap = ap + 16*i + 4*cidx + ridx; 1247 *bap += value; 1248 goto noinsert1; 1249 } 1250 } 1251 N = nrow++ - 1; 1252 high++; /* added new column thus must search to one higher than before */ 1253 /* shift up all the later entries in this row */ 1254 for (ii=N; ii>=i; ii--) { 1255 rp[ii+1] = rp[ii]; 1256 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1257 } 1258 if (N>=i) { 1259 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1260 } 1261 rp[i] = bcol; 1262 ap[16*i + 4*cidx + ridx] = value; 1263 noinsert1:; 1264 low = i; 1265 } 1266 ailen[brow] = nrow; 1267 } 1268 PetscFunctionReturnVoid(); 1269 } 1270 EXTERN_C_END 1271 1272 /* 1273 Checks for missing diagonals 1274 */ 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1277 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1278 { 1279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1280 PetscErrorCode ierr; 1281 PetscInt *diag,*jj = a->j,i; 1282 1283 PetscFunctionBegin; 1284 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1285 *missing = PETSC_FALSE; 1286 if (A->rmap->n > 0 && !jj) { 1287 *missing = PETSC_TRUE; 1288 if (d) *d = 0; 1289 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1290 } else { 1291 diag = a->diag; 1292 for (i=0; i<a->mbs; i++) { 1293 if (jj[diag[i]] != i) { 1294 *missing = PETSC_TRUE; 1295 if (d) *d = i; 1296 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1297 } 1298 } 1299 } 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1305 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1306 { 1307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1308 PetscErrorCode ierr; 1309 PetscInt i,j,m = a->mbs; 1310 1311 PetscFunctionBegin; 1312 if (!a->diag) { 1313 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1314 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1315 a->free_diag = PETSC_TRUE; 1316 } 1317 for (i=0; i<m; i++) { 1318 a->diag[i] = a->i[i+1]; 1319 for (j=a->i[i]; j<a->i[i+1]; j++) { 1320 if (a->j[j] == i) { 1321 a->diag[i] = j; 1322 break; 1323 } 1324 } 1325 } 1326 PetscFunctionReturn(0); 1327 } 1328 1329 1330 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1331 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1334 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1335 { 1336 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1337 PetscErrorCode ierr; 1338 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1339 PetscInt *tia, *tja; 1340 1341 PetscFunctionBegin; 1342 *nn = n; 1343 if (!ia) PetscFunctionReturn(0); 1344 if (symmetric) { 1345 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1346 nz = tia[n]; 1347 } else { 1348 tia = a->i; tja = a->j; 1349 } 1350 1351 if (!blockcompressed && bs > 1) { 1352 (*nn) *= bs; 1353 /* malloc & create the natural set of indices */ 1354 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1355 if (n) { 1356 (*ia)[0] = 0; 1357 for (j=1; j<bs; j++) { 1358 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1359 } 1360 } 1361 1362 for (i=1; i<n; i++) { 1363 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1364 for (j=1; j<bs; j++) { 1365 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1366 } 1367 } 1368 if (n) { 1369 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1370 } 1371 1372 if (ja) { 1373 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1374 cnt = 0; 1375 for (i=0; i<n; i++) { 1376 for (j=0; j<bs; j++) { 1377 for (k=tia[i]; k<tia[i+1]; k++) { 1378 for (l=0; l<bs; l++) { 1379 (*ja)[cnt++] = bs*tja[k] + l; 1380 } 1381 } 1382 } 1383 } 1384 } 1385 1386 n *= bs; 1387 nz *= bs*bs; 1388 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1389 ierr = PetscFree(tia);CHKERRQ(ierr); 1390 ierr = PetscFree(tja);CHKERRQ(ierr); 1391 } 1392 } else if (oshift == 1) { 1393 if (symmetric) { 1394 PetscInt nz = tia[A->rmap->n/bs]; 1395 /* add 1 to i and j indices */ 1396 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1397 *ia = tia; 1398 if (ja) { 1399 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1400 *ja = tja; 1401 } 1402 } else { 1403 PetscInt nz = a->i[A->rmap->n/bs]; 1404 /* malloc space and add 1 to i and j indices */ 1405 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1406 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1407 if (ja) { 1408 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1409 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1410 } 1411 } 1412 } else { 1413 *ia = tia; 1414 if (ja) *ja = tja; 1415 } 1416 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1422 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 1423 { 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 if (!ia) PetscFunctionReturn(0); 1428 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1429 ierr = PetscFree(*ia);CHKERRQ(ierr); 1430 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1431 } 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1437 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1438 { 1439 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1440 PetscErrorCode ierr; 1441 1442 PetscFunctionBegin; 1443 #if defined(PETSC_USE_LOG) 1444 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1445 #endif 1446 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1447 if (a->row) { 1448 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1449 } 1450 if (a->col) { 1451 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1452 } 1453 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1454 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1455 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1456 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1457 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1458 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1459 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1460 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1461 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1462 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1463 1464 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1465 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1466 ierr = PetscFree(a);CHKERRQ(ierr); 1467 1468 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1469 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1470 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1471 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1472 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1473 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1474 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1475 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1476 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1482 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 1483 { 1484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 1487 PetscFunctionBegin; 1488 switch (op) { 1489 case MAT_ROW_ORIENTED: 1490 a->roworiented = flg; 1491 break; 1492 case MAT_KEEP_NONZERO_PATTERN: 1493 a->keepnonzeropattern = flg; 1494 break; 1495 case MAT_NEW_NONZERO_LOCATIONS: 1496 a->nonew = (flg ? 0 : 1); 1497 break; 1498 case MAT_NEW_NONZERO_LOCATION_ERR: 1499 a->nonew = (flg ? -1 : 0); 1500 break; 1501 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1502 a->nonew = (flg ? -2 : 0); 1503 break; 1504 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1505 a->nounused = (flg ? -1 : 0); 1506 break; 1507 case MAT_NEW_DIAGONALS: 1508 case MAT_IGNORE_OFF_PROC_ENTRIES: 1509 case MAT_USE_HASH_TABLE: 1510 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1511 break; 1512 case MAT_SYMMETRIC: 1513 case MAT_STRUCTURALLY_SYMMETRIC: 1514 case MAT_HERMITIAN: 1515 case MAT_SYMMETRY_ETERNAL: 1516 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1517 break; 1518 default: 1519 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1526 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1527 { 1528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1529 PetscErrorCode ierr; 1530 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1531 MatScalar *aa,*aa_i; 1532 PetscScalar *v_i; 1533 1534 PetscFunctionBegin; 1535 bs = A->rmap->bs; 1536 ai = a->i; 1537 aj = a->j; 1538 aa = a->a; 1539 bs2 = a->bs2; 1540 1541 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1542 1543 bn = row/bs; /* Block number */ 1544 bp = row % bs; /* Block Position */ 1545 M = ai[bn+1] - ai[bn]; 1546 *nz = bs*M; 1547 1548 if (v) { 1549 *v = 0; 1550 if (*nz) { 1551 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1552 for (i=0; i<M; i++) { /* for each block in the block row */ 1553 v_i = *v + i*bs; 1554 aa_i = aa + bs2*(ai[bn] + i); 1555 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1556 } 1557 } 1558 } 1559 1560 if (idx) { 1561 *idx = 0; 1562 if (*nz) { 1563 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1564 for (i=0; i<M; i++) { /* for each block in the block row */ 1565 idx_i = *idx + i*bs; 1566 itmp = bs*aj[ai[bn] + i]; 1567 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1568 } 1569 } 1570 } 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1576 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1577 { 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1582 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1583 PetscFunctionReturn(0); 1584 } 1585 1586 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1590 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1591 { 1592 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1593 Mat C; 1594 PetscErrorCode ierr; 1595 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1596 PetscInt *rows,*cols,bs2=a->bs2; 1597 MatScalar *array; 1598 1599 PetscFunctionBegin; 1600 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1601 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1602 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1603 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1604 1605 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1606 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1607 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1608 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1609 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1610 ierr = PetscFree(col);CHKERRQ(ierr); 1611 } else { 1612 C = *B; 1613 } 1614 1615 array = a->a; 1616 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1617 for (i=0; i<mbs; i++) { 1618 cols[0] = i*bs; 1619 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1620 len = ai[i+1] - ai[i]; 1621 for (j=0; j<len; j++) { 1622 rows[0] = (*aj++)*bs; 1623 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1624 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1625 array += bs2; 1626 } 1627 } 1628 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1629 1630 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1631 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 1633 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1634 *B = C; 1635 } else { 1636 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1637 } 1638 PetscFunctionReturn(0); 1639 } 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1643 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1644 { 1645 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1646 PetscErrorCode ierr; 1647 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1648 int fd; 1649 PetscScalar *aa; 1650 FILE *file; 1651 1652 PetscFunctionBegin; 1653 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1654 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1655 col_lens[0] = MAT_FILE_CLASSID; 1656 1657 col_lens[1] = A->rmap->N; 1658 col_lens[2] = A->cmap->n; 1659 col_lens[3] = a->nz*bs2; 1660 1661 /* store lengths of each row and write (including header) to file */ 1662 count = 0; 1663 for (i=0; i<a->mbs; i++) { 1664 for (j=0; j<bs; j++) { 1665 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1666 } 1667 } 1668 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1669 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1670 1671 /* store column indices (zero start index) */ 1672 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1673 count = 0; 1674 for (i=0; i<a->mbs; i++) { 1675 for (j=0; j<bs; j++) { 1676 for (k=a->i[i]; k<a->i[i+1]; k++) { 1677 for (l=0; l<bs; l++) { 1678 jj[count++] = bs*a->j[k] + l; 1679 } 1680 } 1681 } 1682 } 1683 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1684 ierr = PetscFree(jj);CHKERRQ(ierr); 1685 1686 /* store nonzero values */ 1687 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1688 count = 0; 1689 for (i=0; i<a->mbs; i++) { 1690 for (j=0; j<bs; j++) { 1691 for (k=a->i[i]; k<a->i[i+1]; k++) { 1692 for (l=0; l<bs; l++) { 1693 aa[count++] = a->a[bs2*k + l*bs + j]; 1694 } 1695 } 1696 } 1697 } 1698 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1699 ierr = PetscFree(aa);CHKERRQ(ierr); 1700 1701 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1702 if (file) { 1703 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1710 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1711 { 1712 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1713 PetscErrorCode ierr; 1714 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1715 PetscViewerFormat format; 1716 1717 PetscFunctionBegin; 1718 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1719 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1720 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1721 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1722 Mat aij; 1723 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1724 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1725 ierr = MatDestroy(aij);CHKERRQ(ierr); 1726 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1727 PetscFunctionReturn(0); 1728 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1729 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1730 for (i=0; i<a->mbs; i++) { 1731 for (j=0; j<bs; j++) { 1732 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1733 for (k=a->i[i]; k<a->i[i+1]; k++) { 1734 for (l=0; l<bs; l++) { 1735 #if defined(PETSC_USE_COMPLEX) 1736 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1737 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1738 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1739 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1740 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1741 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1742 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1743 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1744 } 1745 #else 1746 if (a->a[bs2*k + l*bs + j] != 0.0) { 1747 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1748 } 1749 #endif 1750 } 1751 } 1752 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1753 } 1754 } 1755 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1756 } else { 1757 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1758 for (i=0; i<a->mbs; i++) { 1759 for (j=0; j<bs; j++) { 1760 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1761 for (k=a->i[i]; k<a->i[i+1]; k++) { 1762 for (l=0; l<bs; l++) { 1763 #if defined(PETSC_USE_COMPLEX) 1764 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1765 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1766 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1767 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1768 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1769 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1770 } else { 1771 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1772 } 1773 #else 1774 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1775 #endif 1776 } 1777 } 1778 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1779 } 1780 } 1781 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1782 } 1783 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1789 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1790 { 1791 Mat A = (Mat) Aa; 1792 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1793 PetscErrorCode ierr; 1794 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1795 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1796 MatScalar *aa; 1797 PetscViewer viewer; 1798 1799 PetscFunctionBegin; 1800 1801 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1802 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1803 1804 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1805 1806 /* loop over matrix elements drawing boxes */ 1807 color = PETSC_DRAW_BLUE; 1808 for (i=0,row=0; i<mbs; i++,row+=bs) { 1809 for (j=a->i[i]; j<a->i[i+1]; j++) { 1810 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1811 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1812 aa = a->a + j*bs2; 1813 for (k=0; k<bs; k++) { 1814 for (l=0; l<bs; l++) { 1815 if (PetscRealPart(*aa++) >= 0.) continue; 1816 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1817 } 1818 } 1819 } 1820 } 1821 color = PETSC_DRAW_CYAN; 1822 for (i=0,row=0; i<mbs; i++,row+=bs) { 1823 for (j=a->i[i]; j<a->i[i+1]; j++) { 1824 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1825 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1826 aa = a->a + j*bs2; 1827 for (k=0; k<bs; k++) { 1828 for (l=0; l<bs; l++) { 1829 if (PetscRealPart(*aa++) != 0.) continue; 1830 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1831 } 1832 } 1833 } 1834 } 1835 1836 color = PETSC_DRAW_RED; 1837 for (i=0,row=0; i<mbs; i++,row+=bs) { 1838 for (j=a->i[i]; j<a->i[i+1]; j++) { 1839 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1840 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1841 aa = a->a + j*bs2; 1842 for (k=0; k<bs; k++) { 1843 for (l=0; l<bs; l++) { 1844 if (PetscRealPart(*aa++) <= 0.) continue; 1845 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1846 } 1847 } 1848 } 1849 } 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1855 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1856 { 1857 PetscErrorCode ierr; 1858 PetscReal xl,yl,xr,yr,w,h; 1859 PetscDraw draw; 1860 PetscTruth isnull; 1861 1862 PetscFunctionBegin; 1863 1864 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1865 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1866 1867 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1868 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1869 xr += w; yr += h; xl = -w; yl = -h; 1870 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1871 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1872 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatView_SeqBAIJ" 1878 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1879 { 1880 PetscErrorCode ierr; 1881 PetscTruth iascii,isbinary,isdraw; 1882 1883 PetscFunctionBegin; 1884 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1885 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1886 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1887 if (iascii){ 1888 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1889 } else if (isbinary) { 1890 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1891 } else if (isdraw) { 1892 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1893 } else { 1894 Mat B; 1895 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1896 ierr = MatView(B,viewer);CHKERRQ(ierr); 1897 ierr = MatDestroy(B);CHKERRQ(ierr); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1905 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1906 { 1907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1908 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1909 PetscInt *ai = a->i,*ailen = a->ilen; 1910 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1911 MatScalar *ap,*aa = a->a; 1912 1913 PetscFunctionBegin; 1914 for (k=0; k<m; k++) { /* loop over rows */ 1915 row = im[k]; brow = row/bs; 1916 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1917 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1918 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1919 nrow = ailen[brow]; 1920 for (l=0; l<n; l++) { /* loop over columns */ 1921 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1922 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1923 col = in[l] ; 1924 bcol = col/bs; 1925 cidx = col%bs; 1926 ridx = row%bs; 1927 high = nrow; 1928 low = 0; /* assume unsorted */ 1929 while (high-low > 5) { 1930 t = (low+high)/2; 1931 if (rp[t] > bcol) high = t; 1932 else low = t; 1933 } 1934 for (i=low; i<high; i++) { 1935 if (rp[i] > bcol) break; 1936 if (rp[i] == bcol) { 1937 *v++ = ap[bs2*i+bs*cidx+ridx]; 1938 goto finished; 1939 } 1940 } 1941 *v++ = 0.0; 1942 finished:; 1943 } 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1950 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1951 { 1952 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1953 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1954 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1955 PetscErrorCode ierr; 1956 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1957 PetscTruth roworiented=a->roworiented; 1958 const PetscScalar *value = v; 1959 MatScalar *ap,*aa = a->a,*bap; 1960 1961 PetscFunctionBegin; 1962 if (roworiented) { 1963 stepval = (n-1)*bs; 1964 } else { 1965 stepval = (m-1)*bs; 1966 } 1967 for (k=0; k<m; k++) { /* loop over added rows */ 1968 row = im[k]; 1969 if (row < 0) continue; 1970 #if defined(PETSC_USE_DEBUG) 1971 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1972 #endif 1973 rp = aj + ai[row]; 1974 ap = aa + bs2*ai[row]; 1975 rmax = imax[row]; 1976 nrow = ailen[row]; 1977 low = 0; 1978 high = nrow; 1979 for (l=0; l<n; l++) { /* loop over added columns */ 1980 if (in[l] < 0) continue; 1981 #if defined(PETSC_USE_DEBUG) 1982 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1983 #endif 1984 col = in[l]; 1985 if (roworiented) { 1986 value = v + (k*(stepval+bs) + l)*bs; 1987 } else { 1988 value = v + (l*(stepval+bs) + k)*bs; 1989 } 1990 if (col <= lastcol) low = 0; else high = nrow; 1991 lastcol = col; 1992 while (high-low > 7) { 1993 t = (low+high)/2; 1994 if (rp[t] > col) high = t; 1995 else low = t; 1996 } 1997 for (i=low; i<high; i++) { 1998 if (rp[i] > col) break; 1999 if (rp[i] == col) { 2000 bap = ap + bs2*i; 2001 if (roworiented) { 2002 if (is == ADD_VALUES) { 2003 for (ii=0; ii<bs; ii++,value+=stepval) { 2004 for (jj=ii; jj<bs2; jj+=bs) { 2005 bap[jj] += *value++; 2006 } 2007 } 2008 } else { 2009 for (ii=0; ii<bs; ii++,value+=stepval) { 2010 for (jj=ii; jj<bs2; jj+=bs) { 2011 bap[jj] = *value++; 2012 } 2013 } 2014 } 2015 } else { 2016 if (is == ADD_VALUES) { 2017 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2018 for (jj=0; jj<bs; jj++) { 2019 bap[jj] += value[jj]; 2020 } 2021 bap += bs; 2022 } 2023 } else { 2024 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2025 for (jj=0; jj<bs; jj++) { 2026 bap[jj] = value[jj]; 2027 } 2028 bap += bs; 2029 } 2030 } 2031 } 2032 goto noinsert2; 2033 } 2034 } 2035 if (nonew == 1) goto noinsert2; 2036 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2037 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2038 N = nrow++ - 1; high++; 2039 /* shift up all the later entries in this row */ 2040 for (ii=N; ii>=i; ii--) { 2041 rp[ii+1] = rp[ii]; 2042 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2043 } 2044 if (N >= i) { 2045 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2046 } 2047 rp[i] = col; 2048 bap = ap + bs2*i; 2049 if (roworiented) { 2050 for (ii=0; ii<bs; ii++,value+=stepval) { 2051 for (jj=ii; jj<bs2; jj+=bs) { 2052 bap[jj] = *value++; 2053 } 2054 } 2055 } else { 2056 for (ii=0; ii<bs; ii++,value+=stepval) { 2057 for (jj=0; jj<bs; jj++) { 2058 *bap++ = *value++; 2059 } 2060 } 2061 } 2062 noinsert2:; 2063 low = i; 2064 } 2065 ailen[row] = nrow; 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2072 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2073 { 2074 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2075 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2076 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2077 PetscErrorCode ierr; 2078 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2079 MatScalar *aa = a->a,*ap; 2080 PetscReal ratio=0.6; 2081 2082 PetscFunctionBegin; 2083 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2084 2085 if (m) rmax = ailen[0]; 2086 for (i=1; i<mbs; i++) { 2087 /* move each row back by the amount of empty slots (fshift) before it*/ 2088 fshift += imax[i-1] - ailen[i-1]; 2089 rmax = PetscMax(rmax,ailen[i]); 2090 if (fshift) { 2091 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2092 N = ailen[i]; 2093 for (j=0; j<N; j++) { 2094 ip[j-fshift] = ip[j]; 2095 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2096 } 2097 } 2098 ai[i] = ai[i-1] + ailen[i-1]; 2099 } 2100 if (mbs) { 2101 fshift += imax[mbs-1] - ailen[mbs-1]; 2102 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2103 } 2104 /* reset ilen and imax for each row */ 2105 for (i=0; i<mbs; i++) { 2106 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2107 } 2108 a->nz = ai[mbs]; 2109 2110 /* diagonals may have moved, so kill the diagonal pointers */ 2111 a->idiagvalid = PETSC_FALSE; 2112 if (fshift && a->diag) { 2113 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2114 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2115 a->diag = 0; 2116 } 2117 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2118 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2119 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2120 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2121 A->info.mallocs += a->reallocs; 2122 a->reallocs = 0; 2123 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2124 2125 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 2126 if (a->compressedrow.use){ 2127 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2128 } 2129 2130 A->same_nonzero = PETSC_TRUE; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /* 2135 This function returns an array of flags which indicate the locations of contiguous 2136 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2137 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2138 Assume: sizes should be long enough to hold all the values. 2139 */ 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2142 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2143 { 2144 PetscInt i,j,k,row; 2145 PetscTruth flg; 2146 2147 PetscFunctionBegin; 2148 for (i=0,j=0; i<n; j++) { 2149 row = idx[i]; 2150 if (row%bs!=0) { /* Not the begining of a block */ 2151 sizes[j] = 1; 2152 i++; 2153 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2154 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2155 i++; 2156 } else { /* Begining of the block, so check if the complete block exists */ 2157 flg = PETSC_TRUE; 2158 for (k=1; k<bs; k++) { 2159 if (row+k != idx[i+k]) { /* break in the block */ 2160 flg = PETSC_FALSE; 2161 break; 2162 } 2163 } 2164 if (flg) { /* No break in the bs */ 2165 sizes[j] = bs; 2166 i+= bs; 2167 } else { 2168 sizes[j] = 1; 2169 i++; 2170 } 2171 } 2172 } 2173 *bs_max = j; 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2179 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2180 { 2181 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2182 PetscErrorCode ierr; 2183 PetscInt i,j,k,count,*rows; 2184 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2185 PetscScalar zero = 0.0; 2186 MatScalar *aa; 2187 2188 PetscFunctionBegin; 2189 /* Make a copy of the IS and sort it */ 2190 /* allocate memory for rows,sizes */ 2191 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2192 2193 /* copy IS values to rows, and sort them */ 2194 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2195 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2196 if (baij->keepnonzeropattern) { 2197 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2198 bs_max = is_n; 2199 A->same_nonzero = PETSC_TRUE; 2200 } else { 2201 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2202 A->same_nonzero = PETSC_FALSE; 2203 } 2204 2205 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2206 row = rows[j]; 2207 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2208 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2209 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2210 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2211 if (diag != 0.0) { 2212 if (baij->ilen[row/bs] > 0) { 2213 baij->ilen[row/bs] = 1; 2214 baij->j[baij->i[row/bs]] = row/bs; 2215 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2216 } 2217 /* Now insert all the diagonal values for this bs */ 2218 for (k=0; k<bs; k++) { 2219 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2220 } 2221 } else { /* (diag == 0.0) */ 2222 baij->ilen[row/bs] = 0; 2223 } /* end (diag == 0.0) */ 2224 } else { /* (sizes[i] != bs) */ 2225 #if defined (PETSC_USE_DEBUG) 2226 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2227 #endif 2228 for (k=0; k<count; k++) { 2229 aa[0] = zero; 2230 aa += bs; 2231 } 2232 if (diag != 0.0) { 2233 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2234 } 2235 } 2236 } 2237 2238 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2239 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2240 PetscFunctionReturn(0); 2241 } 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2245 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2246 { 2247 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2248 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2249 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2250 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2251 PetscErrorCode ierr; 2252 PetscInt ridx,cidx,bs2=a->bs2; 2253 PetscTruth roworiented=a->roworiented; 2254 MatScalar *ap,value,*aa=a->a,*bap; 2255 2256 PetscFunctionBegin; 2257 if (v) PetscValidScalarPointer(v,6); 2258 for (k=0; k<m; k++) { /* loop over added rows */ 2259 row = im[k]; 2260 brow = row/bs; 2261 if (row < 0) continue; 2262 #if defined(PETSC_USE_DEBUG) 2263 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2264 #endif 2265 rp = aj + ai[brow]; 2266 ap = aa + bs2*ai[brow]; 2267 rmax = imax[brow]; 2268 nrow = ailen[brow]; 2269 low = 0; 2270 high = nrow; 2271 for (l=0; l<n; l++) { /* loop over added columns */ 2272 if (in[l] < 0) continue; 2273 #if defined(PETSC_USE_DEBUG) 2274 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2275 #endif 2276 col = in[l]; bcol = col/bs; 2277 ridx = row % bs; cidx = col % bs; 2278 if (roworiented) { 2279 value = v[l + k*n]; 2280 } else { 2281 value = v[k + l*m]; 2282 } 2283 if (col <= lastcol) low = 0; else high = nrow; 2284 lastcol = col; 2285 while (high-low > 7) { 2286 t = (low+high)/2; 2287 if (rp[t] > bcol) high = t; 2288 else low = t; 2289 } 2290 for (i=low; i<high; i++) { 2291 if (rp[i] > bcol) break; 2292 if (rp[i] == bcol) { 2293 bap = ap + bs2*i + bs*cidx + ridx; 2294 if (is == ADD_VALUES) *bap += value; 2295 else *bap = value; 2296 goto noinsert1; 2297 } 2298 } 2299 if (nonew == 1) goto noinsert1; 2300 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2301 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2302 N = nrow++ - 1; high++; 2303 /* shift up all the later entries in this row */ 2304 for (ii=N; ii>=i; ii--) { 2305 rp[ii+1] = rp[ii]; 2306 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2307 } 2308 if (N>=i) { 2309 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2310 } 2311 rp[i] = bcol; 2312 ap[bs2*i + bs*cidx + ridx] = value; 2313 a->nz++; 2314 noinsert1:; 2315 low = i; 2316 } 2317 ailen[brow] = nrow; 2318 } 2319 A->same_nonzero = PETSC_FALSE; 2320 PetscFunctionReturn(0); 2321 } 2322 2323 #undef __FUNCT__ 2324 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2325 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2326 { 2327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2328 Mat outA; 2329 PetscErrorCode ierr; 2330 PetscTruth row_identity,col_identity; 2331 2332 PetscFunctionBegin; 2333 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2334 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2335 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2336 if (!row_identity || !col_identity) { 2337 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2338 } 2339 2340 outA = inA; 2341 inA->factortype = MAT_FACTOR_LU; 2342 2343 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2344 2345 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2346 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2347 a->row = row; 2348 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2349 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2350 a->col = col; 2351 2352 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2353 if (a->icol) { 2354 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2355 } 2356 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2357 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2358 2359 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2360 if (!a->solve_work) { 2361 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2362 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2363 } 2364 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2365 2366 PetscFunctionReturn(0); 2367 } 2368 2369 EXTERN_C_BEGIN 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2372 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2373 { 2374 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2375 PetscInt i,nz,mbs; 2376 2377 PetscFunctionBegin; 2378 nz = baij->maxnz; 2379 mbs = baij->mbs; 2380 for (i=0; i<nz; i++) { 2381 baij->j[i] = indices[i]; 2382 } 2383 baij->nz = nz; 2384 for (i=0; i<mbs; i++) { 2385 baij->ilen[i] = baij->imax[i]; 2386 } 2387 PetscFunctionReturn(0); 2388 } 2389 EXTERN_C_END 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2393 /*@ 2394 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2395 in the matrix. 2396 2397 Input Parameters: 2398 + mat - the SeqBAIJ matrix 2399 - indices - the column indices 2400 2401 Level: advanced 2402 2403 Notes: 2404 This can be called if you have precomputed the nonzero structure of the 2405 matrix and want to provide it to the matrix object to improve the performance 2406 of the MatSetValues() operation. 2407 2408 You MUST have set the correct numbers of nonzeros per row in the call to 2409 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2410 2411 MUST be called before any calls to MatSetValues(); 2412 2413 @*/ 2414 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2415 { 2416 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 2417 2418 PetscFunctionBegin; 2419 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2420 PetscValidPointer(indices,2); 2421 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 2422 if (f) { 2423 ierr = (*f)(mat,indices);CHKERRQ(ierr); 2424 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2430 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2431 { 2432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2433 PetscErrorCode ierr; 2434 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2435 PetscReal atmp; 2436 PetscScalar *x,zero = 0.0; 2437 MatScalar *aa; 2438 PetscInt ncols,brow,krow,kcol; 2439 2440 PetscFunctionBegin; 2441 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2442 bs = A->rmap->bs; 2443 aa = a->a; 2444 ai = a->i; 2445 aj = a->j; 2446 mbs = a->mbs; 2447 2448 ierr = VecSet(v,zero);CHKERRQ(ierr); 2449 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2450 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2451 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2452 for (i=0; i<mbs; i++) { 2453 ncols = ai[1] - ai[0]; ai++; 2454 brow = bs*i; 2455 for (j=0; j<ncols; j++){ 2456 for (kcol=0; kcol<bs; kcol++){ 2457 for (krow=0; krow<bs; krow++){ 2458 atmp = PetscAbsScalar(*aa);aa++; 2459 row = brow + krow; /* row index */ 2460 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2461 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2462 } 2463 } 2464 aj++; 2465 } 2466 } 2467 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatCopy_SeqBAIJ" 2473 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2474 { 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 /* If the two matrices have the same copy implementation, use fast copy. */ 2479 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2481 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2482 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2483 2484 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2485 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2486 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2487 } else { 2488 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2489 } 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #undef __FUNCT__ 2494 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2495 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2496 { 2497 PetscErrorCode ierr; 2498 2499 PetscFunctionBegin; 2500 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2506 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2507 { 2508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2509 PetscFunctionBegin; 2510 *array = a->a; 2511 PetscFunctionReturn(0); 2512 } 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2516 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2517 { 2518 PetscFunctionBegin; 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2524 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2525 { 2526 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2527 PetscErrorCode ierr; 2528 PetscInt i,bs=Y->rmap->bs,j,bs2; 2529 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2530 2531 PetscFunctionBegin; 2532 if (str == SAME_NONZERO_PATTERN) { 2533 PetscScalar alpha = a; 2534 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2535 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2536 if (y->xtoy && y->XtoY != X) { 2537 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2538 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2539 } 2540 if (!y->xtoy) { /* get xtoy */ 2541 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2542 y->XtoY = X; 2543 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2544 } 2545 bs2 = bs*bs; 2546 for (i=0; i<x->nz; i++) { 2547 j = 0; 2548 while (j < bs2){ 2549 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2550 j++; 2551 } 2552 } 2553 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2554 } else { 2555 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2562 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2563 { 2564 PetscInt rbs,cbs; 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2569 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2570 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2571 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 #undef __FUNCT__ 2576 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2577 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2578 { 2579 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2580 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2581 MatScalar *aa = a->a; 2582 2583 PetscFunctionBegin; 2584 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2590 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2591 { 2592 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2593 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2594 MatScalar *aa = a->a; 2595 2596 PetscFunctionBegin; 2597 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2598 PetscFunctionReturn(0); 2599 } 2600 2601 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2605 /* 2606 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2607 */ 2608 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2609 { 2610 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2611 PetscErrorCode ierr; 2612 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2613 PetscInt nz = a->i[m],row,*jj,mr,col; 2614 2615 PetscFunctionBegin; 2616 *nn = n; 2617 if (!ia) PetscFunctionReturn(0); 2618 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2619 else { 2620 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2621 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2622 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2623 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2624 jj = a->j; 2625 for (i=0; i<nz; i++) { 2626 collengths[jj[i]]++; 2627 } 2628 cia[0] = oshift; 2629 for (i=0; i<n; i++) { 2630 cia[i+1] = cia[i] + collengths[i]; 2631 } 2632 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2633 jj = a->j; 2634 for (row=0; row<m; row++) { 2635 mr = a->i[row+1] - a->i[row]; 2636 for (i=0; i<mr; i++) { 2637 col = *jj++; 2638 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2639 } 2640 } 2641 ierr = PetscFree(collengths);CHKERRQ(ierr); 2642 *ia = cia; *ja = cja; 2643 } 2644 PetscFunctionReturn(0); 2645 } 2646 2647 #undef __FUNCT__ 2648 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2649 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 2650 { 2651 PetscErrorCode ierr; 2652 2653 PetscFunctionBegin; 2654 if (!ia) PetscFunctionReturn(0); 2655 ierr = PetscFree(*ia);CHKERRQ(ierr); 2656 ierr = PetscFree(*ja);CHKERRQ(ierr); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2662 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2663 { 2664 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2665 PetscErrorCode ierr; 2666 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2667 PetscScalar dx,*y,*xx,*w3_array; 2668 PetscScalar *vscale_array; 2669 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2670 Vec w1=coloring->w1,w2=coloring->w2,w3; 2671 void *fctx = coloring->fctx; 2672 PetscTruth flg = PETSC_FALSE; 2673 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2674 Vec x1_tmp; 2675 2676 PetscFunctionBegin; 2677 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2678 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2679 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2680 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2681 2682 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2683 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2684 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2685 if (flg) { 2686 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2687 } else { 2688 PetscTruth assembled; 2689 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2690 if (assembled) { 2691 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2692 } 2693 } 2694 2695 x1_tmp = x1; 2696 if (!coloring->vscale){ 2697 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2698 } 2699 2700 /* 2701 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2702 coloring->F for the coarser grids from the finest 2703 */ 2704 if (coloring->F) { 2705 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2706 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2707 if (m1 != m2) { 2708 coloring->F = 0; 2709 } 2710 } 2711 2712 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2713 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2714 } 2715 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2716 2717 /* Set w1 = F(x1) */ 2718 if (coloring->F) { 2719 w1 = coloring->F; /* use already computed value of function */ 2720 coloring->F = 0; 2721 } else { 2722 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2723 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2724 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2725 } 2726 2727 if (!coloring->w3) { 2728 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2729 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2730 } 2731 w3 = coloring->w3; 2732 2733 CHKMEMQ; 2734 /* Compute all the local scale factors, including ghost points */ 2735 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2736 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2737 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2738 if (ctype == IS_COLORING_GHOSTED){ 2739 col_start = 0; col_end = N; 2740 } else if (ctype == IS_COLORING_GLOBAL){ 2741 xx = xx - start; 2742 vscale_array = vscale_array - start; 2743 col_start = start; col_end = N + start; 2744 } CHKMEMQ; 2745 for (col=col_start; col<col_end; col++){ 2746 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2747 if (coloring->htype[0] == 'w') { 2748 dx = 1.0 + unorm; 2749 } else { 2750 dx = xx[col]; 2751 } 2752 if (dx == 0.0) dx = 1.0; 2753 #if !defined(PETSC_USE_COMPLEX) 2754 if (dx < umin && dx >= 0.0) dx = umin; 2755 else if (dx < 0.0 && dx > -umin) dx = -umin; 2756 #else 2757 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2758 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2759 #endif 2760 dx *= epsilon; 2761 vscale_array[col] = 1.0/dx; 2762 } CHKMEMQ; 2763 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2764 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2765 if (ctype == IS_COLORING_GLOBAL){ 2766 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2767 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2768 } 2769 CHKMEMQ; 2770 if (coloring->vscaleforrow) { 2771 vscaleforrow = coloring->vscaleforrow; 2772 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2773 2774 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2775 /* 2776 Loop over each color 2777 */ 2778 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2779 for (k=0; k<coloring->ncolors; k++) { 2780 coloring->currentcolor = k; 2781 for (i=0; i<bs; i++) { 2782 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2783 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2784 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2785 /* 2786 Loop over each column associated with color 2787 adding the perturbation to the vector w3. 2788 */ 2789 for (l=0; l<coloring->ncolumns[k]; l++) { 2790 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2791 if (coloring->htype[0] == 'w') { 2792 dx = 1.0 + unorm; 2793 } else { 2794 dx = xx[col]; 2795 } 2796 if (dx == 0.0) dx = 1.0; 2797 #if !defined(PETSC_USE_COMPLEX) 2798 if (dx < umin && dx >= 0.0) dx = umin; 2799 else if (dx < 0.0 && dx > -umin) dx = -umin; 2800 #else 2801 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2802 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2803 #endif 2804 dx *= epsilon; 2805 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2806 w3_array[col] += dx; 2807 } 2808 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2809 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2810 2811 /* 2812 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2813 w2 = F(x1 + dx) - F(x1) 2814 */ 2815 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2816 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2817 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2818 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2819 2820 /* 2821 Loop over rows of vector, putting results into Jacobian matrix 2822 */ 2823 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2824 for (l=0; l<coloring->nrows[k]; l++) { 2825 row = bs*coloring->rows[k][l]; /* local row index */ 2826 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2827 for (j=0; j<bs; j++) { 2828 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2829 srows[j] = row + start + j; 2830 } 2831 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2832 } 2833 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2834 } 2835 } /* endof for each color */ 2836 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2837 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2838 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2839 ierr = PetscFree(srows);CHKERRQ(ierr); 2840 2841 coloring->currentcolor = -1; 2842 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2843 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 /* -------------------------------------------------------------------*/ 2849 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2850 MatGetRow_SeqBAIJ, 2851 MatRestoreRow_SeqBAIJ, 2852 MatMult_SeqBAIJ_N, 2853 /* 4*/ MatMultAdd_SeqBAIJ_N, 2854 MatMultTranspose_SeqBAIJ, 2855 MatMultTransposeAdd_SeqBAIJ, 2856 0, 2857 0, 2858 0, 2859 /*10*/ 0, 2860 MatLUFactor_SeqBAIJ, 2861 0, 2862 0, 2863 MatTranspose_SeqBAIJ, 2864 /*15*/ MatGetInfo_SeqBAIJ, 2865 MatEqual_SeqBAIJ, 2866 MatGetDiagonal_SeqBAIJ, 2867 MatDiagonalScale_SeqBAIJ, 2868 MatNorm_SeqBAIJ, 2869 /*20*/ 0, 2870 MatAssemblyEnd_SeqBAIJ, 2871 MatSetOption_SeqBAIJ, 2872 MatZeroEntries_SeqBAIJ, 2873 /*24*/ MatZeroRows_SeqBAIJ, 2874 0, 2875 0, 2876 0, 2877 0, 2878 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2879 0, 2880 0, 2881 MatGetArray_SeqBAIJ, 2882 MatRestoreArray_SeqBAIJ, 2883 /*34*/ MatDuplicate_SeqBAIJ, 2884 0, 2885 0, 2886 MatILUFactor_SeqBAIJ, 2887 0, 2888 /*39*/ MatAXPY_SeqBAIJ, 2889 MatGetSubMatrices_SeqBAIJ, 2890 MatIncreaseOverlap_SeqBAIJ, 2891 MatGetValues_SeqBAIJ, 2892 MatCopy_SeqBAIJ, 2893 /*44*/ 0, 2894 MatScale_SeqBAIJ, 2895 0, 2896 0, 2897 0, 2898 /*49*/ MatSetBlockSize_SeqBAIJ, 2899 MatGetRowIJ_SeqBAIJ, 2900 MatRestoreRowIJ_SeqBAIJ, 2901 MatGetColumnIJ_SeqBAIJ, 2902 MatRestoreColumnIJ_SeqBAIJ, 2903 /*54*/ MatFDColoringCreate_SeqAIJ, 2904 0, 2905 0, 2906 0, 2907 MatSetValuesBlocked_SeqBAIJ, 2908 /*59*/ MatGetSubMatrix_SeqBAIJ, 2909 MatDestroy_SeqBAIJ, 2910 MatView_SeqBAIJ, 2911 0, 2912 0, 2913 /*64*/ 0, 2914 0, 2915 0, 2916 0, 2917 0, 2918 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2919 0, 2920 MatConvert_Basic, 2921 0, 2922 0, 2923 /*74*/ 0, 2924 MatFDColoringApply_BAIJ, 2925 0, 2926 0, 2927 0, 2928 /*79*/ 0, 2929 0, 2930 0, 2931 0, 2932 MatLoad_SeqBAIJ, 2933 /*84*/ 0, 2934 0, 2935 0, 2936 0, 2937 0, 2938 /*89*/ 0, 2939 0, 2940 0, 2941 0, 2942 0, 2943 /*94*/ 0, 2944 0, 2945 0, 2946 0, 2947 0, 2948 /*99*/0, 2949 0, 2950 0, 2951 0, 2952 0, 2953 /*104*/0, 2954 MatRealPart_SeqBAIJ, 2955 MatImaginaryPart_SeqBAIJ, 2956 0, 2957 0, 2958 /*109*/0, 2959 0, 2960 0, 2961 0, 2962 MatMissingDiagonal_SeqBAIJ, 2963 /*114*/0, 2964 0, 2965 0, 2966 0, 2967 0, 2968 /*119*/0, 2969 0, 2970 MatMultHermitianTranspose_SeqBAIJ, 2971 MatMultHermitianTransposeAdd_SeqBAIJ, 2972 0, 2973 }; 2974 2975 EXTERN_C_BEGIN 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2978 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2979 { 2980 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2981 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2982 PetscErrorCode ierr; 2983 2984 PetscFunctionBegin; 2985 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2986 2987 /* allocate space for values if not already there */ 2988 if (!aij->saved_values) { 2989 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2990 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2991 } 2992 2993 /* copy values over */ 2994 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2995 PetscFunctionReturn(0); 2996 } 2997 EXTERN_C_END 2998 2999 EXTERN_C_BEGIN 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 3002 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 3003 { 3004 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3005 PetscErrorCode ierr; 3006 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3007 3008 PetscFunctionBegin; 3009 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3010 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3011 3012 /* copy values over */ 3013 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3014 PetscFunctionReturn(0); 3015 } 3016 EXTERN_C_END 3017 3018 EXTERN_C_BEGIN 3019 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3020 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3021 EXTERN_C_END 3022 3023 EXTERN_C_BEGIN 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3026 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3027 { 3028 Mat_SeqBAIJ *b; 3029 PetscErrorCode ierr; 3030 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3031 PetscTruth flg,skipallocation = PETSC_FALSE; 3032 3033 PetscFunctionBegin; 3034 3035 if (nz == MAT_SKIP_ALLOCATION) { 3036 skipallocation = PETSC_TRUE; 3037 nz = 0; 3038 } 3039 3040 if (bs < 0) { 3041 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3042 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3043 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3044 bs = PetscAbs(bs); 3045 } 3046 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3047 bs = newbs; 3048 3049 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3050 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3051 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3052 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3053 3054 B->preallocated = PETSC_TRUE; 3055 3056 mbs = B->rmap->n/bs; 3057 nbs = B->cmap->n/bs; 3058 bs2 = bs*bs; 3059 3060 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3061 3062 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3063 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3064 if (nnz) { 3065 for (i=0; i<mbs; i++) { 3066 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3067 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3068 } 3069 } 3070 3071 b = (Mat_SeqBAIJ*)B->data; 3072 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3073 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3074 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3075 3076 if (!flg) { 3077 switch (bs) { 3078 case 1: 3079 B->ops->mult = MatMult_SeqBAIJ_1; 3080 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3081 B->ops->sor = MatSOR_SeqBAIJ_1; 3082 break; 3083 case 2: 3084 B->ops->mult = MatMult_SeqBAIJ_2; 3085 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3086 B->ops->sor = MatSOR_SeqBAIJ_2; 3087 break; 3088 case 3: 3089 B->ops->mult = MatMult_SeqBAIJ_3; 3090 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3091 B->ops->sor = MatSOR_SeqBAIJ_3; 3092 break; 3093 case 4: 3094 B->ops->mult = MatMult_SeqBAIJ_4; 3095 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3096 B->ops->sor = MatSOR_SeqBAIJ_4; 3097 break; 3098 case 5: 3099 B->ops->mult = MatMult_SeqBAIJ_5; 3100 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3101 B->ops->sor = MatSOR_SeqBAIJ_5; 3102 break; 3103 case 6: 3104 B->ops->mult = MatMult_SeqBAIJ_6; 3105 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3106 B->ops->sor = MatSOR_SeqBAIJ_6; 3107 break; 3108 case 7: 3109 B->ops->mult = MatMult_SeqBAIJ_7; 3110 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3111 B->ops->sor = MatSOR_SeqBAIJ_7; 3112 break; 3113 case 15: 3114 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3115 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3116 B->ops->sor = MatSOR_SeqBAIJ_N; 3117 break; 3118 default: 3119 B->ops->mult = MatMult_SeqBAIJ_N; 3120 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3121 B->ops->sor = MatSOR_SeqBAIJ_N; 3122 break; 3123 } 3124 } 3125 B->rmap->bs = bs; 3126 b->mbs = mbs; 3127 b->nbs = nbs; 3128 if (!skipallocation) { 3129 if (!b->imax) { 3130 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3131 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3132 b->free_imax_ilen = PETSC_TRUE; 3133 } 3134 /* b->ilen will count nonzeros in each block row so far. */ 3135 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3136 if (!nnz) { 3137 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3138 else if (nz < 0) nz = 1; 3139 for (i=0; i<mbs; i++) b->imax[i] = nz; 3140 nz = nz*mbs; 3141 } else { 3142 nz = 0; 3143 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3144 } 3145 3146 /* allocate the matrix space */ 3147 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3148 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3149 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3150 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3151 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3152 b->singlemalloc = PETSC_TRUE; 3153 b->i[0] = 0; 3154 for (i=1; i<mbs+1; i++) { 3155 b->i[i] = b->i[i-1] + b->imax[i-1]; 3156 } 3157 b->free_a = PETSC_TRUE; 3158 b->free_ij = PETSC_TRUE; 3159 } else { 3160 b->free_a = PETSC_FALSE; 3161 b->free_ij = PETSC_FALSE; 3162 } 3163 3164 B->rmap->bs = bs; 3165 b->bs2 = bs2; 3166 b->mbs = mbs; 3167 b->nz = 0; 3168 b->maxnz = nz; 3169 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3170 PetscFunctionReturn(0); 3171 } 3172 EXTERN_C_END 3173 3174 EXTERN_C_BEGIN 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3177 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3178 { 3179 PetscInt i,m,nz,nz_max=0,*nnz; 3180 PetscScalar *values=0; 3181 PetscErrorCode ierr; 3182 3183 PetscFunctionBegin; 3184 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3185 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3186 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3187 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3188 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3189 m = B->rmap->n/bs; 3190 3191 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3192 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3193 for(i=0; i<m; i++) { 3194 nz = ii[i+1]- ii[i]; 3195 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3196 nz_max = PetscMax(nz_max, nz); 3197 nnz[i] = nz; 3198 } 3199 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3200 ierr = PetscFree(nnz);CHKERRQ(ierr); 3201 3202 values = (PetscScalar*)V; 3203 if (!values) { 3204 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3205 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3206 } 3207 for (i=0; i<m; i++) { 3208 PetscInt ncols = ii[i+1] - ii[i]; 3209 const PetscInt *icols = jj + ii[i]; 3210 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3211 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3212 } 3213 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3214 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3215 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 3217 PetscFunctionReturn(0); 3218 } 3219 EXTERN_C_END 3220 3221 3222 EXTERN_C_BEGIN 3223 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3224 #if defined(PETSC_HAVE_MUMPS) 3225 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3226 #endif 3227 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3228 EXTERN_C_END 3229 3230 /*MC 3231 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3232 block sparse compressed row format. 3233 3234 Options Database Keys: 3235 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3236 3237 Level: beginner 3238 3239 .seealso: MatCreateSeqBAIJ() 3240 M*/ 3241 3242 3243 EXTERN_C_BEGIN 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatCreate_SeqBAIJ" 3246 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3247 { 3248 PetscErrorCode ierr; 3249 PetscMPIInt size; 3250 Mat_SeqBAIJ *b; 3251 3252 PetscFunctionBegin; 3253 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3254 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3255 3256 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3257 B->data = (void*)b; 3258 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3259 B->mapping = 0; 3260 b->row = 0; 3261 b->col = 0; 3262 b->icol = 0; 3263 b->reallocs = 0; 3264 b->saved_values = 0; 3265 3266 b->roworiented = PETSC_TRUE; 3267 b->nonew = 0; 3268 b->diag = 0; 3269 b->solve_work = 0; 3270 b->mult_work = 0; 3271 B->spptr = 0; 3272 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3273 b->keepnonzeropattern = PETSC_FALSE; 3274 b->xtoy = 0; 3275 b->XtoY = 0; 3276 b->compressedrow.use = PETSC_FALSE; 3277 b->compressedrow.nrows = 0; 3278 b->compressedrow.i = PETSC_NULL; 3279 b->compressedrow.rindex = PETSC_NULL; 3280 b->compressedrow.checked = PETSC_FALSE; 3281 B->same_nonzero = PETSC_FALSE; 3282 3283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3284 "MatGetFactorAvailable_seqbaij_petsc", 3285 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3286 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3287 "MatGetFactor_seqbaij_petsc", 3288 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3289 #if defined(PETSC_HAVE_MUMPS) 3290 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3291 #endif 3292 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3293 "MatInvertBlockDiagonal_SeqBAIJ", 3294 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3295 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3296 "MatStoreValues_SeqBAIJ", 3297 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3298 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3299 "MatRetrieveValues_SeqBAIJ", 3300 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3301 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3302 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3303 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3304 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3305 "MatConvert_SeqBAIJ_SeqAIJ", 3306 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3307 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3308 "MatConvert_SeqBAIJ_SeqSBAIJ", 3309 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3310 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3311 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3312 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3313 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3314 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3315 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3316 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 EXTERN_C_END 3320 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3323 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace) 3324 { 3325 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3326 PetscErrorCode ierr; 3327 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3328 3329 PetscFunctionBegin; 3330 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3331 3332 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3333 c->imax = a->imax; 3334 c->ilen = a->ilen; 3335 c->free_imax_ilen = PETSC_FALSE; 3336 } else { 3337 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3338 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3339 for (i=0; i<mbs; i++) { 3340 c->imax[i] = a->imax[i]; 3341 c->ilen[i] = a->ilen[i]; 3342 } 3343 c->free_imax_ilen = PETSC_TRUE; 3344 } 3345 3346 /* allocate the matrix space */ 3347 if (mallocmatspace){ 3348 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3349 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3350 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3351 c->singlemalloc = PETSC_FALSE; 3352 c->free_ij = PETSC_FALSE; 3353 c->i = a->i; 3354 c->j = a->j; 3355 c->parent = A; 3356 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3357 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3358 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3359 } else { 3360 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3361 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3362 c->singlemalloc = PETSC_TRUE; 3363 c->free_ij = PETSC_TRUE; 3364 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3365 if (mbs > 0) { 3366 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3367 if (cpvalues == MAT_COPY_VALUES) { 3368 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3369 } else { 3370 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3371 } 3372 } 3373 } 3374 } 3375 3376 c->roworiented = a->roworiented; 3377 c->nonew = a->nonew; 3378 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3379 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3380 c->bs2 = a->bs2; 3381 c->mbs = a->mbs; 3382 c->nbs = a->nbs; 3383 3384 if (a->diag) { 3385 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3386 c->diag = a->diag; 3387 c->free_diag = PETSC_FALSE; 3388 } else { 3389 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3390 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3391 for (i=0; i<mbs; i++) { 3392 c->diag[i] = a->diag[i]; 3393 } 3394 c->free_diag = PETSC_TRUE; 3395 } 3396 } else c->diag = 0; 3397 c->nz = a->nz; 3398 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3399 c->solve_work = 0; 3400 c->mult_work = 0; 3401 c->free_a = PETSC_TRUE; 3402 c->free_ij = PETSC_TRUE; 3403 C->preallocated = PETSC_TRUE; 3404 C->assembled = PETSC_TRUE; 3405 3406 c->compressedrow.use = a->compressedrow.use; 3407 c->compressedrow.nrows = a->compressedrow.nrows; 3408 c->compressedrow.checked = a->compressedrow.checked; 3409 if (a->compressedrow.checked && a->compressedrow.use){ 3410 i = a->compressedrow.nrows; 3411 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3412 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3413 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3414 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3415 } else { 3416 c->compressedrow.use = PETSC_FALSE; 3417 c->compressedrow.i = PETSC_NULL; 3418 c->compressedrow.rindex = PETSC_NULL; 3419 } 3420 C->same_nonzero = A->same_nonzero; 3421 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3422 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3423 PetscFunctionReturn(0); 3424 } 3425 3426 #undef __FUNCT__ 3427 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3428 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3429 { 3430 PetscErrorCode ierr; 3431 3432 PetscFunctionBegin; 3433 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3434 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3435 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3436 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3437 PetscFunctionReturn(0); 3438 } 3439 3440 #undef __FUNCT__ 3441 #define __FUNCT__ "MatLoad_SeqBAIJ" 3442 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3443 { 3444 Mat_SeqBAIJ *a; 3445 PetscErrorCode ierr; 3446 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3447 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3448 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3449 PetscInt *masked,nmask,tmp,bs2,ishift; 3450 PetscMPIInt size; 3451 int fd; 3452 PetscScalar *aa; 3453 MPI_Comm comm = ((PetscObject)viewer)->comm; 3454 3455 PetscFunctionBegin; 3456 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3457 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3458 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3459 bs2 = bs*bs; 3460 3461 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3462 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3463 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3464 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3465 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3466 M = header[1]; N = header[2]; nz = header[3]; 3467 3468 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3469 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3470 3471 /* 3472 This code adds extra rows to make sure the number of rows is 3473 divisible by the blocksize 3474 */ 3475 mbs = M/bs; 3476 extra_rows = bs - M + bs*(mbs); 3477 if (extra_rows == bs) extra_rows = 0; 3478 else mbs++; 3479 if (extra_rows) { 3480 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3481 } 3482 3483 /* Set global sizes if not already set */ 3484 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3485 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3486 } else { /* Check if the matrix global sizes are correct */ 3487 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3488 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3489 } 3490 3491 /* read in row lengths */ 3492 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3493 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3494 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3495 3496 /* read in column indices */ 3497 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3498 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3499 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3500 3501 /* loop over row lengths determining block row lengths */ 3502 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3503 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3504 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3505 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3506 rowcount = 0; 3507 nzcount = 0; 3508 for (i=0; i<mbs; i++) { 3509 nmask = 0; 3510 for (j=0; j<bs; j++) { 3511 kmax = rowlengths[rowcount]; 3512 for (k=0; k<kmax; k++) { 3513 tmp = jj[nzcount++]/bs; 3514 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3515 } 3516 rowcount++; 3517 } 3518 browlengths[i] += nmask; 3519 /* zero out the mask elements we set */ 3520 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3521 } 3522 3523 /* Do preallocation */ 3524 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3525 a = (Mat_SeqBAIJ*)newmat->data; 3526 3527 /* set matrix "i" values */ 3528 a->i[0] = 0; 3529 for (i=1; i<= mbs; i++) { 3530 a->i[i] = a->i[i-1] + browlengths[i-1]; 3531 a->ilen[i-1] = browlengths[i-1]; 3532 } 3533 a->nz = 0; 3534 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3535 3536 /* read in nonzero values */ 3537 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3538 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3539 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3540 3541 /* set "a" and "j" values into matrix */ 3542 nzcount = 0; jcount = 0; 3543 for (i=0; i<mbs; i++) { 3544 nzcountb = nzcount; 3545 nmask = 0; 3546 for (j=0; j<bs; j++) { 3547 kmax = rowlengths[i*bs+j]; 3548 for (k=0; k<kmax; k++) { 3549 tmp = jj[nzcount++]/bs; 3550 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3551 } 3552 } 3553 /* sort the masked values */ 3554 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3555 3556 /* set "j" values into matrix */ 3557 maskcount = 1; 3558 for (j=0; j<nmask; j++) { 3559 a->j[jcount++] = masked[j]; 3560 mask[masked[j]] = maskcount++; 3561 } 3562 /* set "a" values into matrix */ 3563 ishift = bs2*a->i[i]; 3564 for (j=0; j<bs; j++) { 3565 kmax = rowlengths[i*bs+j]; 3566 for (k=0; k<kmax; k++) { 3567 tmp = jj[nzcountb]/bs ; 3568 block = mask[tmp] - 1; 3569 point = jj[nzcountb] - bs*tmp; 3570 idx = ishift + bs2*block + j + bs*point; 3571 a->a[idx] = (MatScalar)aa[nzcountb++]; 3572 } 3573 } 3574 /* zero out the mask elements we set */ 3575 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3576 } 3577 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3578 3579 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3580 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3581 ierr = PetscFree(aa);CHKERRQ(ierr); 3582 ierr = PetscFree(jj);CHKERRQ(ierr); 3583 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3584 3585 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3586 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3587 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3588 PetscFunctionReturn(0); 3589 } 3590 3591 #undef __FUNCT__ 3592 #define __FUNCT__ "MatCreateSeqBAIJ" 3593 /*@C 3594 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3595 compressed row) format. For good matrix assembly performance the 3596 user should preallocate the matrix storage by setting the parameter nz 3597 (or the array nnz). By setting these parameters accurately, performance 3598 during matrix assembly can be increased by more than a factor of 50. 3599 3600 Collective on MPI_Comm 3601 3602 Input Parameters: 3603 + comm - MPI communicator, set to PETSC_COMM_SELF 3604 . bs - size of block 3605 . m - number of rows 3606 . n - number of columns 3607 . nz - number of nonzero blocks per block row (same for all rows) 3608 - nnz - array containing the number of nonzero blocks in the various block rows 3609 (possibly different for each block row) or PETSC_NULL 3610 3611 Output Parameter: 3612 . A - the matrix 3613 3614 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3615 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3616 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3617 3618 Options Database Keys: 3619 . -mat_no_unroll - uses code that does not unroll the loops in the 3620 block calculations (much slower) 3621 . -mat_block_size - size of the blocks to use 3622 3623 Level: intermediate 3624 3625 Notes: 3626 The number of rows and columns must be divisible by blocksize. 3627 3628 If the nnz parameter is given then the nz parameter is ignored 3629 3630 A nonzero block is any block that as 1 or more nonzeros in it 3631 3632 The block AIJ format is fully compatible with standard Fortran 77 3633 storage. That is, the stored row and column indices can begin at 3634 either one (as in Fortran) or zero. See the users' manual for details. 3635 3636 Specify the preallocated storage with either nz or nnz (not both). 3637 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3638 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3639 matrices. 3640 3641 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3642 @*/ 3643 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3644 { 3645 PetscErrorCode ierr; 3646 3647 PetscFunctionBegin; 3648 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3649 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3650 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3651 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3652 PetscFunctionReturn(0); 3653 } 3654 3655 #undef __FUNCT__ 3656 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3657 /*@C 3658 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3659 per row in the matrix. For good matrix assembly performance the 3660 user should preallocate the matrix storage by setting the parameter nz 3661 (or the array nnz). By setting these parameters accurately, performance 3662 during matrix assembly can be increased by more than a factor of 50. 3663 3664 Collective on MPI_Comm 3665 3666 Input Parameters: 3667 + A - the matrix 3668 . bs - size of block 3669 . nz - number of block nonzeros per block row (same for all rows) 3670 - nnz - array containing the number of block nonzeros in the various block rows 3671 (possibly different for each block row) or PETSC_NULL 3672 3673 Options Database Keys: 3674 . -mat_no_unroll - uses code that does not unroll the loops in the 3675 block calculations (much slower) 3676 . -mat_block_size - size of the blocks to use 3677 3678 Level: intermediate 3679 3680 Notes: 3681 If the nnz parameter is given then the nz parameter is ignored 3682 3683 You can call MatGetInfo() to get information on how effective the preallocation was; 3684 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3685 You can also run with the option -info and look for messages with the string 3686 malloc in them to see if additional memory allocation was needed. 3687 3688 The block AIJ format is fully compatible with standard Fortran 77 3689 storage. That is, the stored row and column indices can begin at 3690 either one (as in Fortran) or zero. See the users' manual for details. 3691 3692 Specify the preallocated storage with either nz or nnz (not both). 3693 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3694 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3695 3696 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3697 @*/ 3698 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3699 { 3700 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3701 3702 PetscFunctionBegin; 3703 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3704 if (f) { 3705 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3706 } 3707 PetscFunctionReturn(0); 3708 } 3709 3710 #undef __FUNCT__ 3711 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3712 /*@C 3713 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3714 (the default sequential PETSc format). 3715 3716 Collective on MPI_Comm 3717 3718 Input Parameters: 3719 + A - the matrix 3720 . i - the indices into j for the start of each local row (starts with zero) 3721 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3722 - v - optional values in the matrix 3723 3724 Level: developer 3725 3726 .keywords: matrix, aij, compressed row, sparse 3727 3728 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3729 @*/ 3730 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3731 { 3732 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3733 3734 PetscFunctionBegin; 3735 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3736 if (f) { 3737 ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3738 } 3739 PetscFunctionReturn(0); 3740 } 3741 3742 3743 #undef __FUNCT__ 3744 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3745 /*@ 3746 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3747 3748 Collective on MPI_Comm 3749 3750 Input Parameters: 3751 + comm - must be an MPI communicator of size 1 3752 . bs - size of block 3753 . m - number of rows 3754 . n - number of columns 3755 . i - row indices 3756 . j - column indices 3757 - a - matrix values 3758 3759 Output Parameter: 3760 . mat - the matrix 3761 3762 Level: advanced 3763 3764 Notes: 3765 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3766 once the matrix is destroyed 3767 3768 You cannot set new nonzero locations into this matrix, that will generate an error. 3769 3770 The i and j indices are 0 based 3771 3772 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3773 3774 3775 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3776 3777 @*/ 3778 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3779 { 3780 PetscErrorCode ierr; 3781 PetscInt ii; 3782 Mat_SeqBAIJ *baij; 3783 3784 PetscFunctionBegin; 3785 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3786 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3787 3788 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3789 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3790 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3791 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3792 baij = (Mat_SeqBAIJ*)(*mat)->data; 3793 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3794 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3795 3796 baij->i = i; 3797 baij->j = j; 3798 baij->a = a; 3799 baij->singlemalloc = PETSC_FALSE; 3800 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3801 baij->free_a = PETSC_FALSE; 3802 baij->free_ij = PETSC_FALSE; 3803 3804 for (ii=0; ii<m; ii++) { 3805 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3806 #if defined(PETSC_USE_DEBUG) 3807 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3808 #endif 3809 } 3810 #if defined(PETSC_USE_DEBUG) 3811 for (ii=0; ii<baij->i[m]; ii++) { 3812 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3813 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3814 } 3815 #endif 3816 3817 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3818 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3819 PetscFunctionReturn(0); 3820 } 3821