1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "../src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "petscblaslapack.h" 9 #include "../src/mat/blockinvert.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 13 /*@ 14 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 15 16 Collective on Mat 17 18 Input Parameters: 19 . mat - the matrix 20 21 Level: advanced 22 @*/ 23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 24 { 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 29 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 30 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 31 ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr); 32 PetscFunctionReturn(0); 33 } 34 35 EXTERN_C_BEGIN 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 38 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 39 { 40 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 41 PetscErrorCode ierr; 42 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 43 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 44 PetscReal shift = 0.0; 45 46 PetscFunctionBegin; 47 if (a->idiagvalid) PetscFunctionReturn(0); 48 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 49 diag_offset = a->diag; 50 if (!a->idiag) { 51 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 52 ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 53 } 54 diag = a->idiag; 55 mdiag = a->idiag+bs2*mbs; 56 /* factor and invert each block */ 57 switch (bs){ 58 case 1: 59 for (i=0; i<mbs; i++) { 60 odiag = v + 1*diag_offset[i]; 61 diag[0] = odiag[0]; 62 mdiag[0] = odiag[0]; 63 diag[0] = 1.0 / (diag[0] + shift); 64 diag += 1; 65 mdiag += 1; 66 } 67 break; 68 case 2: 69 for (i=0; i<mbs; i++) { 70 odiag = v + 4*diag_offset[i]; 71 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 72 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 73 ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 74 diag += 4; 75 mdiag += 4; 76 } 77 break; 78 case 3: 79 for (i=0; i<mbs; i++) { 80 odiag = v + 9*diag_offset[i]; 81 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 82 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 83 diag[8] = odiag[8]; 84 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 85 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 86 mdiag[8] = odiag[8]; 87 ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 88 diag += 9; 89 mdiag += 9; 90 } 91 break; 92 case 4: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 16*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 97 ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 98 diag += 16; 99 mdiag += 16; 100 } 101 break; 102 case 5: 103 for (i=0; i<mbs; i++) { 104 odiag = v + 25*diag_offset[i]; 105 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 108 diag += 25; 109 mdiag += 25; 110 } 111 break; 112 case 6: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 36*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 118 diag += 36; 119 mdiag += 36; 120 } 121 break; 122 case 7: 123 for (i=0; i<mbs; i++) { 124 odiag = v + 49*diag_offset[i]; 125 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 127 ierr = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 128 diag += 49; 129 mdiag += 49; 130 } 131 break; 132 default: 133 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 134 for (i=0; i<mbs; i++) { 135 odiag = v + bs2*diag_offset[i]; 136 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 137 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 138 ierr = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 139 diag += bs2; 140 mdiag += bs2; 141 } 142 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 143 } 144 a->idiagvalid = PETSC_TRUE; 145 PetscFunctionReturn(0); 146 } 147 EXTERN_C_END 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatSOR_SeqBAIJ_1" 151 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 152 { 153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 154 PetscScalar *x,x1,s1; 155 const PetscScalar *b; 156 const MatScalar *aa = a->a, *idiag,*mdiag,*v; 157 PetscErrorCode ierr; 158 PetscInt m = a->mbs,i,i2,nz,j; 159 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 160 161 PetscFunctionBegin; 162 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 163 its = its*lits; 164 if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 165 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 166 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 167 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 168 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 169 170 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 171 172 diag = a->diag; 173 idiag = a->idiag; 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 179 x[0] = b[0]*idiag[0]; 180 i2 = 1; 181 idiag += 1; 182 for (i=1; i<m; i++) { 183 v = aa + ai[i]; 184 vi = aj + ai[i]; 185 nz = diag[i] - ai[i]; 186 s1 = b[i2]; 187 for (j=0; j<nz; j++) { 188 s1 -= v[j]*x[vi[j]]; 189 } 190 x[i2] = idiag[0]*s1; 191 idiag += 1; 192 i2 += 1; 193 } 194 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 195 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 196 } 197 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 198 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 199 i2 = 0; 200 mdiag = a->idiag+a->mbs; 201 for (i=0; i<m; i++) { 202 x1 = x[i2]; 203 x[i2] = mdiag[0]*x1; 204 mdiag += 1; 205 i2 += 1; 206 } 207 ierr = PetscLogFlops(m);CHKERRQ(ierr); 208 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 209 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 210 } 211 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 212 idiag = a->idiag+a->mbs - 1; 213 i2 = m - 1; 214 x1 = x[i2]; 215 x[i2] = idiag[0]*x1; 216 idiag -= 1; 217 i2 -= 1; 218 for (i=m-2; i>=0; i--) { 219 v = aa + (diag[i]+1); 220 vi = aj + diag[i] + 1; 221 nz = ai[i+1] - diag[i] - 1; 222 s1 = x[i2]; 223 for (j=0; j<nz; j++) { 224 s1 -= v[j]*x[vi[j]]; 225 } 226 x[i2] = idiag[0]*s1; 227 idiag -= 1; 228 i2 -= 1; 229 } 230 ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 231 } 232 } else { 233 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 234 } 235 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 236 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatSOR_SeqBAIJ_2" 242 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 243 { 244 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 245 PetscScalar *x,x1,x2,s1,s2; 246 const PetscScalar *b; 247 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 248 PetscErrorCode ierr; 249 PetscInt m = a->mbs,i,i2,nz,idx,j,it; 250 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 251 252 PetscFunctionBegin; 253 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 254 its = its*lits; 255 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 256 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 257 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 258 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 259 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 260 261 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 262 263 diag = a->diag; 264 idiag = a->idiag; 265 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 267 268 if (flag & SOR_ZERO_INITIAL_GUESS) { 269 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 270 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 271 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 272 i2 = 2; 273 idiag += 4; 274 for (i=1; i<m; i++) { 275 v = aa + 4*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s1 = b[i2]; s2 = b[i2+1]; 279 for (j=0; j<nz; j++) { 280 idx = 2*vi[j]; 281 it = 4*j; 282 x1 = x[idx]; x2 = x[1+idx]; 283 s1 -= v[it]*x1 + v[it+2]*x2; 284 s2 -= v[it+1]*x1 + v[it+3]*x2; 285 } 286 x[i2] = idiag[0]*s1 + idiag[2]*s2; 287 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 288 idiag += 4; 289 i2 += 2; 290 } 291 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 292 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 293 } 294 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 295 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 296 i2 = 0; 297 mdiag = a->idiag+4*a->mbs; 298 for (i=0; i<m; i++) { 299 x1 = x[i2]; x2 = x[i2+1]; 300 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 301 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 302 mdiag += 4; 303 i2 += 2; 304 } 305 ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 306 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 307 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 308 } 309 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 310 idiag = a->idiag+4*a->mbs - 4; 311 i2 = 2*m - 2; 312 x1 = x[i2]; x2 = x[i2+1]; 313 x[i2] = idiag[0]*x1 + idiag[2]*x2; 314 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 315 idiag -= 4; 316 i2 -= 2; 317 for (i=m-2; i>=0; i--) { 318 v = aa + 4*(diag[i]+1); 319 vi = aj + diag[i] + 1; 320 nz = ai[i+1] - diag[i] - 1; 321 s1 = x[i2]; s2 = x[i2+1]; 322 for (j=0; j<nz; j++) { 323 idx = 2*vi[j]; 324 it = 4*j; 325 x1 = x[idx]; x2 = x[1+idx]; 326 s1 -= v[it]*x1 + v[it+2]*x2; 327 s2 -= v[it+1]*x1 + v[it+3]*x2; 328 } 329 x[i2] = idiag[0]*s1 + idiag[2]*s2; 330 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 331 idiag -= 4; 332 i2 -= 2; 333 } 334 ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 335 } 336 } else { 337 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 338 } 339 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 340 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatSOR_SeqBAIJ_3" 346 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 347 { 348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 349 PetscScalar *x,x1,x2,x3,s1,s2,s3; 350 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 351 const PetscScalar *b; 352 PetscErrorCode ierr; 353 PetscInt m = a->mbs,i,i2,nz,idx; 354 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 355 356 PetscFunctionBegin; 357 its = its*lits; 358 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 359 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 360 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 361 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 362 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 363 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 364 365 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 366 367 diag = a->diag; 368 idiag = a->idiag; 369 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 370 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 371 372 if (flag & SOR_ZERO_INITIAL_GUESS) { 373 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 374 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 375 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 376 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 377 i2 = 3; 378 idiag += 9; 379 for (i=1; i<m; i++) { 380 v = aa + 9*ai[i]; 381 vi = aj + ai[i]; 382 nz = diag[i] - ai[i]; 383 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 384 while (nz--) { 385 idx = 3*(*vi++); 386 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 387 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 388 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 389 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 390 v += 9; 391 } 392 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 393 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 394 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 395 idiag += 9; 396 i2 += 3; 397 } 398 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 399 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 400 } 401 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 402 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 403 i2 = 0; 404 mdiag = a->idiag+9*a->mbs; 405 for (i=0; i<m; i++) { 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 407 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 408 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 409 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 410 mdiag += 9; 411 i2 += 3; 412 } 413 ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 414 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 415 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 416 } 417 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 418 idiag = a->idiag+9*a->mbs - 9; 419 i2 = 3*m - 3; 420 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 421 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 422 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 423 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 424 idiag -= 9; 425 i2 -= 3; 426 for (i=m-2; i>=0; i--) { 427 v = aa + 9*(diag[i]+1); 428 vi = aj + diag[i] + 1; 429 nz = ai[i+1] - diag[i] - 1; 430 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 431 while (nz--) { 432 idx = 3*(*vi++); 433 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 434 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 435 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 436 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 437 v += 9; 438 } 439 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 440 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 441 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 446 } 447 } else { 448 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 449 } 450 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 451 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatSOR_SeqBAIJ_4" 457 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 458 { 459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 460 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 461 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 462 const PetscScalar *b; 463 PetscErrorCode ierr; 464 PetscInt m = a->mbs,i,i2,nz,idx; 465 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 466 467 PetscFunctionBegin; 468 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 469 its = its*lits; 470 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 471 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 472 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 473 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 474 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 475 476 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 477 478 diag = a->diag; 479 idiag = a->idiag; 480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 481 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 482 483 if (flag & SOR_ZERO_INITIAL_GUESS) { 484 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 485 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 486 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 487 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 488 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 489 i2 = 4; 490 idiag += 16; 491 for (i=1; i<m; i++) { 492 v = aa + 16*ai[i]; 493 vi = aj + ai[i]; 494 nz = diag[i] - ai[i]; 495 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 496 while (nz--) { 497 idx = 4*(*vi++); 498 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 499 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 500 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 501 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 502 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 503 v += 16; 504 } 505 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 506 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 507 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 508 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 509 idiag += 16; 510 i2 += 4; 511 } 512 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 513 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 514 } 515 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 516 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 517 i2 = 0; 518 mdiag = a->idiag+16*a->mbs; 519 for (i=0; i<m; i++) { 520 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 521 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 522 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 523 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 524 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 525 mdiag += 16; 526 i2 += 4; 527 } 528 ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 529 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 530 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 531 } 532 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 533 idiag = a->idiag+16*a->mbs - 16; 534 i2 = 4*m - 4; 535 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 536 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 537 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 538 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 539 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 540 idiag -= 16; 541 i2 -= 4; 542 for (i=m-2; i>=0; i--) { 543 v = aa + 16*(diag[i]+1); 544 vi = aj + diag[i] + 1; 545 nz = ai[i+1] - diag[i] - 1; 546 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 547 while (nz--) { 548 idx = 4*(*vi++); 549 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 550 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 551 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 552 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 553 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 554 v += 16; 555 } 556 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 557 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 558 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 559 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 560 idiag -= 16; 561 i2 -= 4; 562 } 563 ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 564 } 565 } else { 566 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 567 } 568 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 569 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatSOR_SeqBAIJ_5" 575 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 576 { 577 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 578 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 579 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 580 const PetscScalar *b; 581 PetscErrorCode ierr; 582 PetscInt m = a->mbs,i,i2,nz,idx; 583 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 584 585 PetscFunctionBegin; 586 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 587 its = its*lits; 588 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 589 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 590 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 591 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 592 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 593 594 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 595 596 diag = a->diag; 597 idiag = a->idiag; 598 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 599 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 600 601 if (flag & SOR_ZERO_INITIAL_GUESS) { 602 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 603 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 604 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 605 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 606 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 607 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 608 i2 = 5; 609 idiag += 25; 610 for (i=1; i<m; i++) { 611 v = aa + 25*ai[i]; 612 vi = aj + ai[i]; 613 nz = diag[i] - ai[i]; 614 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 615 while (nz--) { 616 idx = 5*(*vi++); 617 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 618 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 619 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 620 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 621 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 622 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 623 v += 25; 624 } 625 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 626 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 627 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 628 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 629 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 630 idiag += 25; 631 i2 += 5; 632 } 633 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 634 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 635 } 636 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 637 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 638 i2 = 0; 639 mdiag = a->idiag+25*a->mbs; 640 for (i=0; i<m; i++) { 641 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 642 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 643 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 644 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 645 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 646 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 647 mdiag += 25; 648 i2 += 5; 649 } 650 ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 651 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 652 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 653 } 654 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 655 idiag = a->idiag+25*a->mbs - 25; 656 i2 = 5*m - 5; 657 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 658 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 659 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 660 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 661 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 662 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 663 idiag -= 25; 664 i2 -= 5; 665 for (i=m-2; i>=0; i--) { 666 v = aa + 25*(diag[i]+1); 667 vi = aj + diag[i] + 1; 668 nz = ai[i+1] - diag[i] - 1; 669 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 670 while (nz--) { 671 idx = 5*(*vi++); 672 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 673 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 674 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 675 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 676 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 677 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 678 v += 25; 679 } 680 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 681 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 682 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 683 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 684 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 685 idiag -= 25; 686 i2 -= 5; 687 } 688 ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 689 } 690 } else { 691 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 692 } 693 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 694 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 695 PetscFunctionReturn(0); 696 } 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatSOR_SeqBAIJ_6" 700 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 701 { 702 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 703 PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 704 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 705 const PetscScalar *b; 706 PetscErrorCode ierr; 707 PetscInt m = a->mbs,i,i2,nz,idx; 708 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 709 710 PetscFunctionBegin; 711 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 712 its = its*lits; 713 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 714 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 715 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 716 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 717 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 718 719 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 720 721 diag = a->diag; 722 idiag = a->idiag; 723 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 724 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 725 726 if (flag & SOR_ZERO_INITIAL_GUESS) { 727 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 728 x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 729 x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 730 x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 731 x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 732 x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 733 x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 734 i2 = 6; 735 idiag += 36; 736 for (i=1; i<m; i++) { 737 v = aa + 36*ai[i]; 738 vi = aj + ai[i]; 739 nz = diag[i] - ai[i]; 740 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 741 while (nz--) { 742 idx = 6*(*vi++); 743 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 744 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 745 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 746 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 747 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 748 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 749 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 750 v += 36; 751 } 752 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 753 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 754 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 755 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 756 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 757 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 758 idiag += 36; 759 i2 += 6; 760 } 761 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 762 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 763 } 764 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 765 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 766 i2 = 0; 767 mdiag = a->idiag+36*a->mbs; 768 for (i=0; i<m; i++) { 769 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 770 x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 771 x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 772 x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 773 x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 774 x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 775 x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 776 mdiag += 36; 777 i2 += 6; 778 } 779 ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 780 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 781 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 782 } 783 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 784 idiag = a->idiag+36*a->mbs - 36; 785 i2 = 6*m - 6; 786 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 787 x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 788 x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 789 x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 790 x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 791 x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 792 x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 793 idiag -= 36; 794 i2 -= 6; 795 for (i=m-2; i>=0; i--) { 796 v = aa + 36*(diag[i]+1); 797 vi = aj + diag[i] + 1; 798 nz = ai[i+1] - diag[i] - 1; 799 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 800 while (nz--) { 801 idx = 6*(*vi++); 802 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 803 s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 804 s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 805 s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 806 s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 807 s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 808 s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 809 v += 36; 810 } 811 x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 812 x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 813 x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 814 x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 815 x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 816 x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 817 idiag -= 36; 818 i2 -= 6; 819 } 820 ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 821 } 822 } else { 823 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 824 } 825 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 826 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatSOR_SeqBAIJ_7" 832 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 833 { 834 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 835 PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 836 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 837 const PetscScalar *b; 838 PetscErrorCode ierr; 839 PetscInt m = a->mbs,i,i2,nz,idx; 840 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 841 842 PetscFunctionBegin; 843 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 844 its = its*lits; 845 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 846 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 847 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 848 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 849 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 850 851 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 852 853 diag = a->diag; 854 idiag = a->idiag; 855 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 856 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 857 858 if (flag & SOR_ZERO_INITIAL_GUESS) { 859 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 860 x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 861 x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 862 x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 863 x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 864 x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 865 x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 866 x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 867 i2 = 7; 868 idiag += 49; 869 for (i=1; i<m; i++) { 870 v = aa + 49*ai[i]; 871 vi = aj + ai[i]; 872 nz = diag[i] - ai[i]; 873 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 874 while (nz--) { 875 idx = 7*(*vi++); 876 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 877 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 878 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 879 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 880 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 881 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 882 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 883 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 884 v += 49; 885 } 886 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 887 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 888 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 889 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 890 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 891 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 892 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 893 idiag += 49; 894 i2 += 7; 895 } 896 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 897 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 898 } 899 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 900 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 901 i2 = 0; 902 mdiag = a->idiag+49*a->mbs; 903 for (i=0; i<m; i++) { 904 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 905 x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7; 906 x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7; 907 x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7; 908 x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7; 909 x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7; 910 x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7; 911 x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7; 912 mdiag += 49; 913 i2 += 7; 914 } 915 ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 916 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 917 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 918 } 919 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 920 idiag = a->idiag+49*a->mbs - 49; 921 i2 = 7*m - 7; 922 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 923 x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 924 x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 925 x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 926 x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 927 x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 928 x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 929 x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 930 idiag -= 49; 931 i2 -= 7; 932 for (i=m-2; i>=0; i--) { 933 v = aa + 49*(diag[i]+1); 934 vi = aj + diag[i] + 1; 935 nz = ai[i+1] - diag[i] - 1; 936 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 937 while (nz--) { 938 idx = 7*(*vi++); 939 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 940 s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 941 s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 942 s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 943 s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 944 s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 945 s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 946 s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 947 v += 49; 948 } 949 x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 950 x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 951 x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 952 x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 953 x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 954 x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 955 x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 956 idiag -= 49; 957 i2 -= 7; 958 } 959 ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 960 } 961 } else { 962 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 963 } 964 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 965 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSOR_SeqBAIJ_N" 971 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 972 { 973 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 974 PetscScalar *x,*work,*w,*workt; 975 const MatScalar *v,*aa = a->a, *idiag,*mdiag; 976 const PetscScalar *b; 977 PetscErrorCode ierr; 978 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j; 979 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 980 981 PetscFunctionBegin; 982 its = its*lits; 983 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 984 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 985 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 986 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 987 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 988 if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 989 990 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 991 992 diag = a->diag; 993 idiag = a->idiag; 994 if (!a->mult_work) { 995 k = PetscMax(A->rmap->n,A->cmap->n); 996 ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 997 } 998 work = a->mult_work; 999 if (!a->sor_work) { 1000 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 1001 } 1002 w = a->sor_work; 1003 1004 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1005 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1006 1007 if (flag & SOR_ZERO_INITIAL_GUESS) { 1008 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1009 Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 1010 /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 1011 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 1012 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/ 1013 i2 = bs; 1014 idiag += bs2; 1015 for (i=1; i<m; i++) { 1016 v = aa + bs2*ai[i]; 1017 vi = aj + ai[i]; 1018 nz = diag[i] - ai[i]; 1019 1020 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1021 /* copy all rows of x that are needed into contiguous space */ 1022 workt = work; 1023 for (j=0; j<nz; j++) { 1024 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1025 workt += bs; 1026 } 1027 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1028 /*s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 1029 while (nz--) { 1030 idx = N*(*vi++); 1031 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 1032 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1033 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1034 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1035 v += N2; 1036 } */ 1037 1038 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1039 /* x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1040 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1041 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/ 1042 1043 idiag += bs2; 1044 i2 += bs; 1045 } 1046 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1047 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1048 } 1049 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1050 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1051 i2 = 0; 1052 mdiag = a->idiag+bs2*a->mbs; 1053 ierr = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr); 1054 for (i=0; i<m; i++) { 1055 Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2); 1056 /* x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1057 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 1058 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 1059 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */ 1060 1061 mdiag += bs2; 1062 i2 += bs; 1063 } 1064 ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr); 1065 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1066 ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 1067 } 1068 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1069 idiag = a->idiag+bs2*a->mbs - bs2; 1070 i2 = bs*m - bs; 1071 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1072 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1073 /*x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 1074 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 1075 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 1076 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/ 1077 idiag -= bs2; 1078 i2 -= bs; 1079 for (i=m-2; i>=0; i--) { 1080 v = aa + bs2*(diag[i]+1); 1081 vi = aj + diag[i] + 1; 1082 nz = ai[i+1] - diag[i] - 1; 1083 1084 ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1085 /* copy all rows of x that are needed into contiguous space */ 1086 workt = work; 1087 for (j=0; j<nz; j++) { 1088 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1089 workt += bs; 1090 } 1091 Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 1092 /* s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 1093 while (nz--) { 1094 idx = N*(*vi++); 1095 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 1096 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 1097 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 1098 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 1099 v += N2; 1100 } */ 1101 1102 Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 1103 /*x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 1104 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 1105 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */ 1106 idiag -= bs2; 1107 i2 -= bs; 1108 } 1109 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1110 } 1111 } else { 1112 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 1113 } 1114 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1115 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 /* 1120 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1121 */ 1122 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1123 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1124 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1125 #define matsetvaluesblocked4_ matsetvaluesblocked4 1126 #endif 1127 1128 EXTERN_C_BEGIN 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "matsetvaluesblocked4_" 1131 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 1132 { 1133 Mat A = *AA; 1134 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1135 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 1136 PetscInt *ai=a->i,*ailen=a->ilen; 1137 PetscInt *aj=a->j,stepval,lastcol = -1; 1138 const PetscScalar *value = v; 1139 MatScalar *ap,*aa = a->a,*bap; 1140 1141 PetscFunctionBegin; 1142 if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 1143 stepval = (n-1)*4; 1144 for (k=0; k<m; k++) { /* loop over added rows */ 1145 row = im[k]; 1146 rp = aj + ai[row]; 1147 ap = aa + 16*ai[row]; 1148 nrow = ailen[row]; 1149 low = 0; 1150 high = nrow; 1151 for (l=0; l<n; l++) { /* loop over added columns */ 1152 col = in[l]; 1153 if (col <= lastcol) low = 0; else high = nrow; 1154 lastcol = col; 1155 value = v + k*(stepval+4 + l)*4; 1156 while (high-low > 7) { 1157 t = (low+high)/2; 1158 if (rp[t] > col) high = t; 1159 else low = t; 1160 } 1161 for (i=low; i<high; i++) { 1162 if (rp[i] > col) break; 1163 if (rp[i] == col) { 1164 bap = ap + 16*i; 1165 for (ii=0; ii<4; ii++,value+=stepval) { 1166 for (jj=ii; jj<16; jj+=4) { 1167 bap[jj] += *value++; 1168 } 1169 } 1170 goto noinsert2; 1171 } 1172 } 1173 N = nrow++ - 1; 1174 high++; /* added new column index thus must search to one higher than before */ 1175 /* shift up all the later entries in this row */ 1176 for (ii=N; ii>=i; ii--) { 1177 rp[ii+1] = rp[ii]; 1178 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1179 } 1180 if (N >= i) { 1181 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1182 } 1183 rp[i] = col; 1184 bap = ap + 16*i; 1185 for (ii=0; ii<4; ii++,value+=stepval) { 1186 for (jj=ii; jj<16; jj+=4) { 1187 bap[jj] = *value++; 1188 } 1189 } 1190 noinsert2:; 1191 low = i; 1192 } 1193 ailen[row] = nrow; 1194 } 1195 PetscFunctionReturnVoid(); 1196 } 1197 EXTERN_C_END 1198 1199 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1200 #define matsetvalues4_ MATSETVALUES4 1201 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1202 #define matsetvalues4_ matsetvalues4 1203 #endif 1204 1205 EXTERN_C_BEGIN 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatSetValues4_" 1208 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1209 { 1210 Mat A = *AA; 1211 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1212 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1213 PetscInt *ai=a->i,*ailen=a->ilen; 1214 PetscInt *aj=a->j,brow,bcol; 1215 PetscInt ridx,cidx,lastcol = -1; 1216 MatScalar *ap,value,*aa=a->a,*bap; 1217 1218 PetscFunctionBegin; 1219 for (k=0; k<m; k++) { /* loop over added rows */ 1220 row = im[k]; brow = row/4; 1221 rp = aj + ai[brow]; 1222 ap = aa + 16*ai[brow]; 1223 nrow = ailen[brow]; 1224 low = 0; 1225 high = nrow; 1226 for (l=0; l<n; l++) { /* loop over added columns */ 1227 col = in[l]; bcol = col/4; 1228 ridx = row % 4; cidx = col % 4; 1229 value = v[l + k*n]; 1230 if (col <= lastcol) low = 0; else high = nrow; 1231 lastcol = col; 1232 while (high-low > 7) { 1233 t = (low+high)/2; 1234 if (rp[t] > bcol) high = t; 1235 else low = t; 1236 } 1237 for (i=low; i<high; i++) { 1238 if (rp[i] > bcol) break; 1239 if (rp[i] == bcol) { 1240 bap = ap + 16*i + 4*cidx + ridx; 1241 *bap += value; 1242 goto noinsert1; 1243 } 1244 } 1245 N = nrow++ - 1; 1246 high++; /* added new column thus must search to one higher than before */ 1247 /* shift up all the later entries in this row */ 1248 for (ii=N; ii>=i; ii--) { 1249 rp[ii+1] = rp[ii]; 1250 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1251 } 1252 if (N>=i) { 1253 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1254 } 1255 rp[i] = bcol; 1256 ap[16*i + 4*cidx + ridx] = value; 1257 noinsert1:; 1258 low = i; 1259 } 1260 ailen[brow] = nrow; 1261 } 1262 PetscFunctionReturnVoid(); 1263 } 1264 EXTERN_C_END 1265 1266 /* 1267 Checks for missing diagonals 1268 */ 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1271 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1272 { 1273 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1274 PetscErrorCode ierr; 1275 PetscInt *diag,*jj = a->j,i; 1276 1277 PetscFunctionBegin; 1278 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1279 *missing = PETSC_FALSE; 1280 if (A->rmap->n > 0 && !jj) { 1281 *missing = PETSC_TRUE; 1282 if (d) *d = 0; 1283 PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 1284 } else { 1285 diag = a->diag; 1286 for (i=0; i<a->mbs; i++) { 1287 if (jj[diag[i]] != i) { 1288 *missing = PETSC_TRUE; 1289 if (d) *d = i; 1290 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1291 } 1292 } 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1299 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1300 { 1301 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1302 PetscErrorCode ierr; 1303 PetscInt i,j,m = a->mbs; 1304 1305 PetscFunctionBegin; 1306 if (!a->diag) { 1307 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1308 ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr); 1309 a->free_diag = PETSC_TRUE; 1310 } 1311 for (i=0; i<m; i++) { 1312 a->diag[i] = a->i[i+1]; 1313 for (j=a->i[i]; j<a->i[i+1]; j++) { 1314 if (a->j[j] == i) { 1315 a->diag[i] = j; 1316 break; 1317 } 1318 } 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 1324 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1328 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1329 { 1330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt; 1333 PetscInt *tia, *tja; 1334 1335 PetscFunctionBegin; 1336 *nn = n; 1337 if (!ia) PetscFunctionReturn(0); 1338 if (symmetric) { 1339 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1340 nz = tia[n]; 1341 } else { 1342 tia = a->i; tja = a->j; 1343 } 1344 1345 if (!blockcompressed && bs > 1) { 1346 (*nn) *= bs; 1347 /* malloc & create the natural set of indices */ 1348 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1349 if (n) { 1350 (*ia)[0] = 0; 1351 for (j=1; j<bs; j++) { 1352 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1353 } 1354 } 1355 1356 for (i=1; i<n; i++) { 1357 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1358 for (j=1; j<bs; j++) { 1359 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1360 } 1361 } 1362 if (n) { 1363 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1364 } 1365 1366 if (ja) { 1367 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1368 cnt = 0; 1369 for (i=0; i<n; i++) { 1370 for (j=0; j<bs; j++) { 1371 for (k=tia[i]; k<tia[i+1]; k++) { 1372 for (l=0; l<bs; l++) { 1373 (*ja)[cnt++] = bs*tja[k] + l; 1374 } 1375 } 1376 } 1377 } 1378 } 1379 1380 n *= bs; 1381 nz *= bs*bs; 1382 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1383 ierr = PetscFree(tia);CHKERRQ(ierr); 1384 ierr = PetscFree(tja);CHKERRQ(ierr); 1385 } 1386 } else if (oshift == 1) { 1387 if (symmetric) { 1388 PetscInt nz = tia[A->rmap->n/bs]; 1389 /* add 1 to i and j indices */ 1390 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1391 *ia = tia; 1392 if (ja) { 1393 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1394 *ja = tja; 1395 } 1396 } else { 1397 PetscInt nz = a->i[A->rmap->n/bs]; 1398 /* malloc space and add 1 to i and j indices */ 1399 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1400 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1401 if (ja) { 1402 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1403 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1404 } 1405 } 1406 } else { 1407 *ia = tia; 1408 if (ja) *ja = tja; 1409 } 1410 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1416 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 1417 { 1418 PetscErrorCode ierr; 1419 1420 PetscFunctionBegin; 1421 if (!ia) PetscFunctionReturn(0); 1422 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1423 ierr = PetscFree(*ia);CHKERRQ(ierr); 1424 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1431 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1432 { 1433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1434 PetscErrorCode ierr; 1435 1436 PetscFunctionBegin; 1437 #if defined(PETSC_USE_LOG) 1438 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1439 #endif 1440 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1441 if (a->row) { 1442 ierr = ISDestroy(a->row);CHKERRQ(ierr); 1443 } 1444 if (a->col) { 1445 ierr = ISDestroy(a->col);CHKERRQ(ierr); 1446 } 1447 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1448 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1449 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1450 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1451 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1452 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1453 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 1454 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1455 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1456 if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);} 1457 1458 if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1459 if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);} 1460 ierr = PetscFree(a);CHKERRQ(ierr); 1461 1462 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1463 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1464 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1465 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1466 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1467 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1468 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1469 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1470 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1476 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1477 { 1478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 switch (op) { 1483 case MAT_ROW_ORIENTED: 1484 a->roworiented = flg; 1485 break; 1486 case MAT_KEEP_NONZERO_PATTERN: 1487 a->keepnonzeropattern = flg; 1488 break; 1489 case MAT_NEW_NONZERO_LOCATIONS: 1490 a->nonew = (flg ? 0 : 1); 1491 break; 1492 case MAT_NEW_NONZERO_LOCATION_ERR: 1493 a->nonew = (flg ? -1 : 0); 1494 break; 1495 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1496 a->nonew = (flg ? -2 : 0); 1497 break; 1498 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1499 a->nounused = (flg ? -1 : 0); 1500 break; 1501 case MAT_NEW_DIAGONALS: 1502 case MAT_IGNORE_OFF_PROC_ENTRIES: 1503 case MAT_USE_HASH_TABLE: 1504 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1505 break; 1506 case MAT_SYMMETRIC: 1507 case MAT_STRUCTURALLY_SYMMETRIC: 1508 case MAT_HERMITIAN: 1509 case MAT_SYMMETRY_ETERNAL: 1510 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1511 break; 1512 default: 1513 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1521 { 1522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1525 MatScalar *aa,*aa_i; 1526 PetscScalar *v_i; 1527 1528 PetscFunctionBegin; 1529 bs = A->rmap->bs; 1530 ai = a->i; 1531 aj = a->j; 1532 aa = a->a; 1533 bs2 = a->bs2; 1534 1535 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1536 1537 bn = row/bs; /* Block number */ 1538 bp = row % bs; /* Block Position */ 1539 M = ai[bn+1] - ai[bn]; 1540 *nz = bs*M; 1541 1542 if (v) { 1543 *v = 0; 1544 if (*nz) { 1545 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1546 for (i=0; i<M; i++) { /* for each block in the block row */ 1547 v_i = *v + i*bs; 1548 aa_i = aa + bs2*(ai[bn] + i); 1549 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 1550 } 1551 } 1552 } 1553 1554 if (idx) { 1555 *idx = 0; 1556 if (*nz) { 1557 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1558 for (i=0; i<M; i++) { /* for each block in the block row */ 1559 idx_i = *idx + i*bs; 1560 itmp = bs*aj[ai[bn] + i]; 1561 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1562 } 1563 } 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1571 { 1572 PetscErrorCode ierr; 1573 1574 PetscFunctionBegin; 1575 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1576 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1577 PetscFunctionReturn(0); 1578 } 1579 1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1585 { 1586 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1587 Mat C; 1588 PetscErrorCode ierr; 1589 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1590 PetscInt *rows,*cols,bs2=a->bs2; 1591 MatScalar *array; 1592 1593 PetscFunctionBegin; 1594 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1595 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1596 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1597 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1598 1599 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1600 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1601 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1602 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1603 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1604 ierr = PetscFree(col);CHKERRQ(ierr); 1605 } else { 1606 C = *B; 1607 } 1608 1609 array = a->a; 1610 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1611 for (i=0; i<mbs; i++) { 1612 cols[0] = i*bs; 1613 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1614 len = ai[i+1] - ai[i]; 1615 for (j=0; j<len; j++) { 1616 rows[0] = (*aj++)*bs; 1617 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1618 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1619 array += bs2; 1620 } 1621 } 1622 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1623 1624 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1625 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1626 1627 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1628 *B = C; 1629 } else { 1630 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1631 } 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1637 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1638 { 1639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1640 PetscErrorCode ierr; 1641 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1642 int fd; 1643 PetscScalar *aa; 1644 FILE *file; 1645 1646 PetscFunctionBegin; 1647 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1648 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1649 col_lens[0] = MAT_FILE_CLASSID; 1650 1651 col_lens[1] = A->rmap->N; 1652 col_lens[2] = A->cmap->n; 1653 col_lens[3] = a->nz*bs2; 1654 1655 /* store lengths of each row and write (including header) to file */ 1656 count = 0; 1657 for (i=0; i<a->mbs; i++) { 1658 for (j=0; j<bs; j++) { 1659 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1660 } 1661 } 1662 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1663 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1664 1665 /* store column indices (zero start index) */ 1666 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1667 count = 0; 1668 for (i=0; i<a->mbs; i++) { 1669 for (j=0; j<bs; j++) { 1670 for (k=a->i[i]; k<a->i[i+1]; k++) { 1671 for (l=0; l<bs; l++) { 1672 jj[count++] = bs*a->j[k] + l; 1673 } 1674 } 1675 } 1676 } 1677 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1678 ierr = PetscFree(jj);CHKERRQ(ierr); 1679 1680 /* store nonzero values */ 1681 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1682 count = 0; 1683 for (i=0; i<a->mbs; i++) { 1684 for (j=0; j<bs; j++) { 1685 for (k=a->i[i]; k<a->i[i+1]; k++) { 1686 for (l=0; l<bs; l++) { 1687 aa[count++] = a->a[bs2*k + l*bs + j]; 1688 } 1689 } 1690 } 1691 } 1692 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1693 ierr = PetscFree(aa);CHKERRQ(ierr); 1694 1695 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1696 if (file) { 1697 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1698 } 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1704 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1705 { 1706 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1707 PetscErrorCode ierr; 1708 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1709 PetscViewerFormat format; 1710 1711 PetscFunctionBegin; 1712 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1713 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1714 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1715 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1716 Mat aij; 1717 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1718 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1719 ierr = MatDestroy(aij);CHKERRQ(ierr); 1720 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1721 PetscFunctionReturn(0); 1722 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1723 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1724 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1725 for (i=0; i<a->mbs; i++) { 1726 for (j=0; j<bs; j++) { 1727 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1728 for (k=a->i[i]; k<a->i[i+1]; k++) { 1729 for (l=0; l<bs; l++) { 1730 #if defined(PETSC_USE_COMPLEX) 1731 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1732 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1733 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1734 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1735 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1736 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1737 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1738 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1739 } 1740 #else 1741 if (a->a[bs2*k + l*bs + j] != 0.0) { 1742 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1743 } 1744 #endif 1745 } 1746 } 1747 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1748 } 1749 } 1750 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1751 } else { 1752 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1753 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr); 1754 for (i=0; i<a->mbs; i++) { 1755 for (j=0; j<bs; j++) { 1756 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1757 for (k=a->i[i]; k<a->i[i+1]; k++) { 1758 for (l=0; l<bs; l++) { 1759 #if defined(PETSC_USE_COMPLEX) 1760 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1761 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1762 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1763 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1764 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1765 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1766 } else { 1767 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1768 } 1769 #else 1770 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1771 #endif 1772 } 1773 } 1774 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1775 } 1776 } 1777 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1778 } 1779 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1785 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1786 { 1787 Mat A = (Mat) Aa; 1788 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1791 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1792 MatScalar *aa; 1793 PetscViewer viewer; 1794 1795 PetscFunctionBegin; 1796 1797 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1798 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1799 1800 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1801 1802 /* loop over matrix elements drawing boxes */ 1803 color = PETSC_DRAW_BLUE; 1804 for (i=0,row=0; i<mbs; i++,row+=bs) { 1805 for (j=a->i[i]; j<a->i[i+1]; j++) { 1806 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1807 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1808 aa = a->a + j*bs2; 1809 for (k=0; k<bs; k++) { 1810 for (l=0; l<bs; l++) { 1811 if (PetscRealPart(*aa++) >= 0.) continue; 1812 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1813 } 1814 } 1815 } 1816 } 1817 color = PETSC_DRAW_CYAN; 1818 for (i=0,row=0; i<mbs; i++,row+=bs) { 1819 for (j=a->i[i]; j<a->i[i+1]; j++) { 1820 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1821 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1822 aa = a->a + j*bs2; 1823 for (k=0; k<bs; k++) { 1824 for (l=0; l<bs; l++) { 1825 if (PetscRealPart(*aa++) != 0.) continue; 1826 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1827 } 1828 } 1829 } 1830 } 1831 1832 color = PETSC_DRAW_RED; 1833 for (i=0,row=0; i<mbs; i++,row+=bs) { 1834 for (j=a->i[i]; j<a->i[i+1]; j++) { 1835 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1836 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1837 aa = a->a + j*bs2; 1838 for (k=0; k<bs; k++) { 1839 for (l=0; l<bs; l++) { 1840 if (PetscRealPart(*aa++) <= 0.) continue; 1841 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1842 } 1843 } 1844 } 1845 } 1846 PetscFunctionReturn(0); 1847 } 1848 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1851 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1852 { 1853 PetscErrorCode ierr; 1854 PetscReal xl,yl,xr,yr,w,h; 1855 PetscDraw draw; 1856 PetscBool isnull; 1857 1858 PetscFunctionBegin; 1859 1860 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1861 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1862 1863 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1864 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1865 xr += w; yr += h; xl = -w; yl = -h; 1866 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1867 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1868 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatView_SeqBAIJ" 1874 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1875 { 1876 PetscErrorCode ierr; 1877 PetscBool iascii,isbinary,isdraw; 1878 1879 PetscFunctionBegin; 1880 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1881 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1882 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1883 if (iascii){ 1884 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1885 } else if (isbinary) { 1886 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1887 } else if (isdraw) { 1888 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1889 } else { 1890 Mat B; 1891 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1892 ierr = MatView(B,viewer);CHKERRQ(ierr); 1893 ierr = MatDestroy(B);CHKERRQ(ierr); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1901 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1902 { 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1904 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1905 PetscInt *ai = a->i,*ailen = a->ilen; 1906 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1907 MatScalar *ap,*aa = a->a; 1908 1909 PetscFunctionBegin; 1910 for (k=0; k<m; k++) { /* loop over rows */ 1911 row = im[k]; brow = row/bs; 1912 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1913 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1914 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1915 nrow = ailen[brow]; 1916 for (l=0; l<n; l++) { /* loop over columns */ 1917 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1918 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1919 col = in[l] ; 1920 bcol = col/bs; 1921 cidx = col%bs; 1922 ridx = row%bs; 1923 high = nrow; 1924 low = 0; /* assume unsorted */ 1925 while (high-low > 5) { 1926 t = (low+high)/2; 1927 if (rp[t] > bcol) high = t; 1928 else low = t; 1929 } 1930 for (i=low; i<high; i++) { 1931 if (rp[i] > bcol) break; 1932 if (rp[i] == bcol) { 1933 *v++ = ap[bs2*i+bs*cidx+ridx]; 1934 goto finished; 1935 } 1936 } 1937 *v++ = 0.0; 1938 finished:; 1939 } 1940 } 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1946 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1947 { 1948 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1949 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1950 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1951 PetscErrorCode ierr; 1952 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1953 PetscBool roworiented=a->roworiented; 1954 const PetscScalar *value = v; 1955 MatScalar *ap,*aa = a->a,*bap; 1956 1957 PetscFunctionBegin; 1958 if (roworiented) { 1959 stepval = (n-1)*bs; 1960 } else { 1961 stepval = (m-1)*bs; 1962 } 1963 for (k=0; k<m; k++) { /* loop over added rows */ 1964 row = im[k]; 1965 if (row < 0) continue; 1966 #if defined(PETSC_USE_DEBUG) 1967 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1968 #endif 1969 rp = aj + ai[row]; 1970 ap = aa + bs2*ai[row]; 1971 rmax = imax[row]; 1972 nrow = ailen[row]; 1973 low = 0; 1974 high = nrow; 1975 for (l=0; l<n; l++) { /* loop over added columns */ 1976 if (in[l] < 0) continue; 1977 #if defined(PETSC_USE_DEBUG) 1978 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1979 #endif 1980 col = in[l]; 1981 if (roworiented) { 1982 value = v + (k*(stepval+bs) + l)*bs; 1983 } else { 1984 value = v + (l*(stepval+bs) + k)*bs; 1985 } 1986 if (col <= lastcol) low = 0; else high = nrow; 1987 lastcol = col; 1988 while (high-low > 7) { 1989 t = (low+high)/2; 1990 if (rp[t] > col) high = t; 1991 else low = t; 1992 } 1993 for (i=low; i<high; i++) { 1994 if (rp[i] > col) break; 1995 if (rp[i] == col) { 1996 bap = ap + bs2*i; 1997 if (roworiented) { 1998 if (is == ADD_VALUES) { 1999 for (ii=0; ii<bs; ii++,value+=stepval) { 2000 for (jj=ii; jj<bs2; jj+=bs) { 2001 bap[jj] += *value++; 2002 } 2003 } 2004 } else { 2005 for (ii=0; ii<bs; ii++,value+=stepval) { 2006 for (jj=ii; jj<bs2; jj+=bs) { 2007 bap[jj] = *value++; 2008 } 2009 } 2010 } 2011 } else { 2012 if (is == ADD_VALUES) { 2013 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2014 for (jj=0; jj<bs; jj++) { 2015 bap[jj] += value[jj]; 2016 } 2017 bap += bs; 2018 } 2019 } else { 2020 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 2021 for (jj=0; jj<bs; jj++) { 2022 bap[jj] = value[jj]; 2023 } 2024 bap += bs; 2025 } 2026 } 2027 } 2028 goto noinsert2; 2029 } 2030 } 2031 if (nonew == 1) goto noinsert2; 2032 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2033 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2034 N = nrow++ - 1; high++; 2035 /* shift up all the later entries in this row */ 2036 for (ii=N; ii>=i; ii--) { 2037 rp[ii+1] = rp[ii]; 2038 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2039 } 2040 if (N >= i) { 2041 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2042 } 2043 rp[i] = col; 2044 bap = ap + bs2*i; 2045 if (roworiented) { 2046 for (ii=0; ii<bs; ii++,value+=stepval) { 2047 for (jj=ii; jj<bs2; jj+=bs) { 2048 bap[jj] = *value++; 2049 } 2050 } 2051 } else { 2052 for (ii=0; ii<bs; ii++,value+=stepval) { 2053 for (jj=0; jj<bs; jj++) { 2054 *bap++ = *value++; 2055 } 2056 } 2057 } 2058 noinsert2:; 2059 low = i; 2060 } 2061 ailen[row] = nrow; 2062 } 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 2068 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 2069 { 2070 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2071 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 2072 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 2073 PetscErrorCode ierr; 2074 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 2075 MatScalar *aa = a->a,*ap; 2076 PetscReal ratio=0.6; 2077 2078 PetscFunctionBegin; 2079 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2080 2081 if (m) rmax = ailen[0]; 2082 for (i=1; i<mbs; i++) { 2083 /* move each row back by the amount of empty slots (fshift) before it*/ 2084 fshift += imax[i-1] - ailen[i-1]; 2085 rmax = PetscMax(rmax,ailen[i]); 2086 if (fshift) { 2087 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 2088 N = ailen[i]; 2089 for (j=0; j<N; j++) { 2090 ip[j-fshift] = ip[j]; 2091 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2092 } 2093 } 2094 ai[i] = ai[i-1] + ailen[i-1]; 2095 } 2096 if (mbs) { 2097 fshift += imax[mbs-1] - ailen[mbs-1]; 2098 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 2099 } 2100 /* reset ilen and imax for each row */ 2101 for (i=0; i<mbs; i++) { 2102 ailen[i] = imax[i] = ai[i+1] - ai[i]; 2103 } 2104 a->nz = ai[mbs]; 2105 2106 /* diagonals may have moved, so kill the diagonal pointers */ 2107 a->idiagvalid = PETSC_FALSE; 2108 if (fshift && a->diag) { 2109 ierr = PetscFree(a->diag);CHKERRQ(ierr); 2110 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2111 a->diag = 0; 2112 } 2113 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 2114 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 2115 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 2116 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 2117 A->info.mallocs += a->reallocs; 2118 a->reallocs = 0; 2119 A->info.nz_unneeded = (PetscReal)fshift*bs2; 2120 2121 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 2122 if (a->compressedrow.use){ 2123 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 2124 } 2125 2126 A->same_nonzero = PETSC_TRUE; 2127 PetscFunctionReturn(0); 2128 } 2129 2130 /* 2131 This function returns an array of flags which indicate the locations of contiguous 2132 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2133 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2134 Assume: sizes should be long enough to hold all the values. 2135 */ 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 2138 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2139 { 2140 PetscInt i,j,k,row; 2141 PetscBool flg; 2142 2143 PetscFunctionBegin; 2144 for (i=0,j=0; i<n; j++) { 2145 row = idx[i]; 2146 if (row%bs!=0) { /* Not the begining of a block */ 2147 sizes[j] = 1; 2148 i++; 2149 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2150 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2151 i++; 2152 } else { /* Begining of the block, so check if the complete block exists */ 2153 flg = PETSC_TRUE; 2154 for (k=1; k<bs; k++) { 2155 if (row+k != idx[i+k]) { /* break in the block */ 2156 flg = PETSC_FALSE; 2157 break; 2158 } 2159 } 2160 if (flg) { /* No break in the bs */ 2161 sizes[j] = bs; 2162 i+= bs; 2163 } else { 2164 sizes[j] = 1; 2165 i++; 2166 } 2167 } 2168 } 2169 *bs_max = j; 2170 PetscFunctionReturn(0); 2171 } 2172 2173 #undef __FUNCT__ 2174 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2175 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2176 { 2177 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2178 PetscErrorCode ierr; 2179 PetscInt i,j,k,count,*rows; 2180 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2181 PetscScalar zero = 0.0; 2182 MatScalar *aa; 2183 2184 PetscFunctionBegin; 2185 /* Make a copy of the IS and sort it */ 2186 /* allocate memory for rows,sizes */ 2187 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2188 2189 /* copy IS values to rows, and sort them */ 2190 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2191 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2192 if (baij->keepnonzeropattern) { 2193 for (i=0; i<is_n; i++) { sizes[i] = 1; } 2194 bs_max = is_n; 2195 A->same_nonzero = PETSC_TRUE; 2196 } else { 2197 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2198 A->same_nonzero = PETSC_FALSE; 2199 } 2200 2201 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2202 row = rows[j]; 2203 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2204 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2205 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2206 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2207 if (diag != 0.0) { 2208 if (baij->ilen[row/bs] > 0) { 2209 baij->ilen[row/bs] = 1; 2210 baij->j[baij->i[row/bs]] = row/bs; 2211 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2212 } 2213 /* Now insert all the diagonal values for this bs */ 2214 for (k=0; k<bs; k++) { 2215 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2216 } 2217 } else { /* (diag == 0.0) */ 2218 baij->ilen[row/bs] = 0; 2219 } /* end (diag == 0.0) */ 2220 } else { /* (sizes[i] != bs) */ 2221 #if defined (PETSC_USE_DEBUG) 2222 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2223 #endif 2224 for (k=0; k<count; k++) { 2225 aa[0] = zero; 2226 aa += bs; 2227 } 2228 if (diag != 0.0) { 2229 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2230 } 2231 } 2232 } 2233 2234 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2235 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2236 PetscFunctionReturn(0); 2237 } 2238 2239 #undef __FUNCT__ 2240 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2241 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2242 { 2243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2244 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2245 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2246 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2247 PetscErrorCode ierr; 2248 PetscInt ridx,cidx,bs2=a->bs2; 2249 PetscBool roworiented=a->roworiented; 2250 MatScalar *ap,value,*aa=a->a,*bap; 2251 2252 PetscFunctionBegin; 2253 if (v) PetscValidScalarPointer(v,6); 2254 for (k=0; k<m; k++) { /* loop over added rows */ 2255 row = im[k]; 2256 brow = row/bs; 2257 if (row < 0) continue; 2258 #if defined(PETSC_USE_DEBUG) 2259 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2260 #endif 2261 rp = aj + ai[brow]; 2262 ap = aa + bs2*ai[brow]; 2263 rmax = imax[brow]; 2264 nrow = ailen[brow]; 2265 low = 0; 2266 high = nrow; 2267 for (l=0; l<n; l++) { /* loop over added columns */ 2268 if (in[l] < 0) continue; 2269 #if defined(PETSC_USE_DEBUG) 2270 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2271 #endif 2272 col = in[l]; bcol = col/bs; 2273 ridx = row % bs; cidx = col % bs; 2274 if (roworiented) { 2275 value = v[l + k*n]; 2276 } else { 2277 value = v[k + l*m]; 2278 } 2279 if (col <= lastcol) low = 0; else high = nrow; 2280 lastcol = col; 2281 while (high-low > 7) { 2282 t = (low+high)/2; 2283 if (rp[t] > bcol) high = t; 2284 else low = t; 2285 } 2286 for (i=low; i<high; i++) { 2287 if (rp[i] > bcol) break; 2288 if (rp[i] == bcol) { 2289 bap = ap + bs2*i + bs*cidx + ridx; 2290 if (is == ADD_VALUES) *bap += value; 2291 else *bap = value; 2292 goto noinsert1; 2293 } 2294 } 2295 if (nonew == 1) goto noinsert1; 2296 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2297 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2298 N = nrow++ - 1; high++; 2299 /* shift up all the later entries in this row */ 2300 for (ii=N; ii>=i; ii--) { 2301 rp[ii+1] = rp[ii]; 2302 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2303 } 2304 if (N>=i) { 2305 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2306 } 2307 rp[i] = bcol; 2308 ap[bs2*i + bs*cidx + ridx] = value; 2309 a->nz++; 2310 noinsert1:; 2311 low = i; 2312 } 2313 ailen[brow] = nrow; 2314 } 2315 A->same_nonzero = PETSC_FALSE; 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2321 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2322 { 2323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2324 Mat outA; 2325 PetscErrorCode ierr; 2326 PetscBool row_identity,col_identity; 2327 2328 PetscFunctionBegin; 2329 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2330 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2331 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2332 if (!row_identity || !col_identity) { 2333 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2334 } 2335 2336 outA = inA; 2337 inA->factortype = MAT_FACTOR_LU; 2338 2339 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2340 2341 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2342 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2343 a->row = row; 2344 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2345 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2346 a->col = col; 2347 2348 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2349 if (a->icol) { 2350 ierr = ISDestroy(a->icol);CHKERRQ(ierr); 2351 } 2352 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2353 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2354 2355 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2356 if (!a->solve_work) { 2357 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2358 ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2359 } 2360 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2361 2362 PetscFunctionReturn(0); 2363 } 2364 2365 EXTERN_C_BEGIN 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2368 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2369 { 2370 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2371 PetscInt i,nz,mbs; 2372 2373 PetscFunctionBegin; 2374 nz = baij->maxnz; 2375 mbs = baij->mbs; 2376 for (i=0; i<nz; i++) { 2377 baij->j[i] = indices[i]; 2378 } 2379 baij->nz = nz; 2380 for (i=0; i<mbs; i++) { 2381 baij->ilen[i] = baij->imax[i]; 2382 } 2383 PetscFunctionReturn(0); 2384 } 2385 EXTERN_C_END 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2389 /*@ 2390 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2391 in the matrix. 2392 2393 Input Parameters: 2394 + mat - the SeqBAIJ matrix 2395 - indices - the column indices 2396 2397 Level: advanced 2398 2399 Notes: 2400 This can be called if you have precomputed the nonzero structure of the 2401 matrix and want to provide it to the matrix object to improve the performance 2402 of the MatSetValues() operation. 2403 2404 You MUST have set the correct numbers of nonzeros per row in the call to 2405 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2406 2407 MUST be called before any calls to MatSetValues(); 2408 2409 @*/ 2410 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2411 { 2412 PetscErrorCode ierr; 2413 2414 PetscFunctionBegin; 2415 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2416 PetscValidPointer(indices,2); 2417 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2423 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2424 { 2425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2426 PetscErrorCode ierr; 2427 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2428 PetscReal atmp; 2429 PetscScalar *x,zero = 0.0; 2430 MatScalar *aa; 2431 PetscInt ncols,brow,krow,kcol; 2432 2433 PetscFunctionBegin; 2434 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2435 bs = A->rmap->bs; 2436 aa = a->a; 2437 ai = a->i; 2438 aj = a->j; 2439 mbs = a->mbs; 2440 2441 ierr = VecSet(v,zero);CHKERRQ(ierr); 2442 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2443 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2444 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2445 for (i=0; i<mbs; i++) { 2446 ncols = ai[1] - ai[0]; ai++; 2447 brow = bs*i; 2448 for (j=0; j<ncols; j++){ 2449 for (kcol=0; kcol<bs; kcol++){ 2450 for (krow=0; krow<bs; krow++){ 2451 atmp = PetscAbsScalar(*aa);aa++; 2452 row = brow + krow; /* row index */ 2453 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2454 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2455 } 2456 } 2457 aj++; 2458 } 2459 } 2460 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatCopy_SeqBAIJ" 2466 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2467 { 2468 PetscErrorCode ierr; 2469 2470 PetscFunctionBegin; 2471 /* If the two matrices have the same copy implementation, use fast copy. */ 2472 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2474 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2475 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2476 2477 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2478 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2479 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2480 } else { 2481 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2482 } 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2488 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2489 { 2490 PetscErrorCode ierr; 2491 2492 PetscFunctionBegin; 2493 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 #undef __FUNCT__ 2498 #define __FUNCT__ "MatGetArray_SeqBAIJ" 2499 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2500 { 2501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2502 PetscFunctionBegin; 2503 *array = a->a; 2504 PetscFunctionReturn(0); 2505 } 2506 2507 #undef __FUNCT__ 2508 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2509 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2510 { 2511 PetscFunctionBegin; 2512 PetscFunctionReturn(0); 2513 } 2514 2515 #undef __FUNCT__ 2516 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2517 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2518 { 2519 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2520 PetscErrorCode ierr; 2521 PetscInt i,bs=Y->rmap->bs,j,bs2; 2522 PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 2523 2524 PetscFunctionBegin; 2525 if (str == SAME_NONZERO_PATTERN) { 2526 PetscScalar alpha = a; 2527 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2528 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2529 if (y->xtoy && y->XtoY != X) { 2530 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2531 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2532 } 2533 if (!y->xtoy) { /* get xtoy */ 2534 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2535 y->XtoY = X; 2536 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2537 } 2538 bs2 = bs*bs; 2539 for (i=0; i<x->nz; i++) { 2540 j = 0; 2541 while (j < bs2){ 2542 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2543 j++; 2544 } 2545 } 2546 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2547 } else { 2548 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2549 } 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ" 2555 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs) 2556 { 2557 PetscInt rbs,cbs; 2558 PetscErrorCode ierr; 2559 2560 PetscFunctionBegin; 2561 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2562 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2563 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2564 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 #undef __FUNCT__ 2569 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2570 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2571 { 2572 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2573 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2574 MatScalar *aa = a->a; 2575 2576 PetscFunctionBegin; 2577 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2583 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2584 { 2585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2586 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2587 MatScalar *aa = a->a; 2588 2589 PetscFunctionBegin; 2590 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2595 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2598 /* 2599 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2600 */ 2601 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2602 { 2603 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2604 PetscErrorCode ierr; 2605 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2606 PetscInt nz = a->i[m],row,*jj,mr,col; 2607 2608 PetscFunctionBegin; 2609 *nn = n; 2610 if (!ia) PetscFunctionReturn(0); 2611 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2612 else { 2613 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2614 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2615 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2616 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2617 jj = a->j; 2618 for (i=0; i<nz; i++) { 2619 collengths[jj[i]]++; 2620 } 2621 cia[0] = oshift; 2622 for (i=0; i<n; i++) { 2623 cia[i+1] = cia[i] + collengths[i]; 2624 } 2625 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2626 jj = a->j; 2627 for (row=0; row<m; row++) { 2628 mr = a->i[row+1] - a->i[row]; 2629 for (i=0; i<mr; i++) { 2630 col = *jj++; 2631 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2632 } 2633 } 2634 ierr = PetscFree(collengths);CHKERRQ(ierr); 2635 *ia = cia; *ja = cja; 2636 } 2637 PetscFunctionReturn(0); 2638 } 2639 2640 #undef __FUNCT__ 2641 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2642 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool *done) 2643 { 2644 PetscErrorCode ierr; 2645 2646 PetscFunctionBegin; 2647 if (!ia) PetscFunctionReturn(0); 2648 ierr = PetscFree(*ia);CHKERRQ(ierr); 2649 ierr = PetscFree(*ja);CHKERRQ(ierr); 2650 PetscFunctionReturn(0); 2651 } 2652 2653 #undef __FUNCT__ 2654 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2655 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2656 { 2657 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2658 PetscErrorCode ierr; 2659 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2660 PetscScalar dx,*y,*xx,*w3_array; 2661 PetscScalar *vscale_array; 2662 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2663 Vec w1=coloring->w1,w2=coloring->w2,w3; 2664 void *fctx = coloring->fctx; 2665 PetscBool flg = PETSC_FALSE; 2666 PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2667 Vec x1_tmp; 2668 2669 PetscFunctionBegin; 2670 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2671 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2672 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2673 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2674 2675 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2676 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2677 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2678 if (flg) { 2679 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2680 } else { 2681 PetscBool assembled; 2682 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2683 if (assembled) { 2684 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2685 } 2686 } 2687 2688 x1_tmp = x1; 2689 if (!coloring->vscale){ 2690 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2691 } 2692 2693 /* 2694 This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2695 coloring->F for the coarser grids from the finest 2696 */ 2697 if (coloring->F) { 2698 ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2699 ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2700 if (m1 != m2) { 2701 coloring->F = 0; 2702 } 2703 } 2704 2705 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2706 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2707 } 2708 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2709 2710 /* Set w1 = F(x1) */ 2711 if (coloring->F) { 2712 w1 = coloring->F; /* use already computed value of function */ 2713 coloring->F = 0; 2714 } else { 2715 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2716 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2717 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2718 } 2719 2720 if (!coloring->w3) { 2721 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2722 ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2723 } 2724 w3 = coloring->w3; 2725 2726 CHKMEMQ; 2727 /* Compute all the local scale factors, including ghost points */ 2728 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2729 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2730 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2731 if (ctype == IS_COLORING_GHOSTED){ 2732 col_start = 0; col_end = N; 2733 } else if (ctype == IS_COLORING_GLOBAL){ 2734 xx = xx - start; 2735 vscale_array = vscale_array - start; 2736 col_start = start; col_end = N + start; 2737 } CHKMEMQ; 2738 for (col=col_start; col<col_end; col++){ 2739 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2740 if (coloring->htype[0] == 'w') { 2741 dx = 1.0 + unorm; 2742 } else { 2743 dx = xx[col]; 2744 } 2745 if (dx == 0.0) dx = 1.0; 2746 #if !defined(PETSC_USE_COMPLEX) 2747 if (dx < umin && dx >= 0.0) dx = umin; 2748 else if (dx < 0.0 && dx > -umin) dx = -umin; 2749 #else 2750 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2751 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2752 #endif 2753 dx *= epsilon; 2754 vscale_array[col] = 1.0/dx; 2755 } CHKMEMQ; 2756 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2757 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2758 if (ctype == IS_COLORING_GLOBAL){ 2759 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2760 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2761 } 2762 CHKMEMQ; 2763 if (coloring->vscaleforrow) { 2764 vscaleforrow = coloring->vscaleforrow; 2765 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2766 2767 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2768 /* 2769 Loop over each color 2770 */ 2771 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2772 for (k=0; k<coloring->ncolors; k++) { 2773 coloring->currentcolor = k; 2774 for (i=0; i<bs; i++) { 2775 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2776 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2777 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2778 /* 2779 Loop over each column associated with color 2780 adding the perturbation to the vector w3. 2781 */ 2782 for (l=0; l<coloring->ncolumns[k]; l++) { 2783 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2784 if (coloring->htype[0] == 'w') { 2785 dx = 1.0 + unorm; 2786 } else { 2787 dx = xx[col]; 2788 } 2789 if (dx == 0.0) dx = 1.0; 2790 #if !defined(PETSC_USE_COMPLEX) 2791 if (dx < umin && dx >= 0.0) dx = umin; 2792 else if (dx < 0.0 && dx > -umin) dx = -umin; 2793 #else 2794 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2795 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2796 #endif 2797 dx *= epsilon; 2798 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2799 w3_array[col] += dx; 2800 } 2801 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2802 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2803 2804 /* 2805 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2806 w2 = F(x1 + dx) - F(x1) 2807 */ 2808 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2809 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2810 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2811 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2812 2813 /* 2814 Loop over rows of vector, putting results into Jacobian matrix 2815 */ 2816 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2817 for (l=0; l<coloring->nrows[k]; l++) { 2818 row = bs*coloring->rows[k][l]; /* local row index */ 2819 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2820 for (j=0; j<bs; j++) { 2821 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2822 srows[j] = row + start + j; 2823 } 2824 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2825 } 2826 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2827 } 2828 } /* endof for each color */ 2829 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2830 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2831 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2832 ierr = PetscFree(srows);CHKERRQ(ierr); 2833 2834 coloring->currentcolor = -1; 2835 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2836 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2837 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2838 PetscFunctionReturn(0); 2839 } 2840 2841 /* -------------------------------------------------------------------*/ 2842 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2843 MatGetRow_SeqBAIJ, 2844 MatRestoreRow_SeqBAIJ, 2845 MatMult_SeqBAIJ_N, 2846 /* 4*/ MatMultAdd_SeqBAIJ_N, 2847 MatMultTranspose_SeqBAIJ, 2848 MatMultTransposeAdd_SeqBAIJ, 2849 0, 2850 0, 2851 0, 2852 /*10*/ 0, 2853 MatLUFactor_SeqBAIJ, 2854 0, 2855 0, 2856 MatTranspose_SeqBAIJ, 2857 /*15*/ MatGetInfo_SeqBAIJ, 2858 MatEqual_SeqBAIJ, 2859 MatGetDiagonal_SeqBAIJ, 2860 MatDiagonalScale_SeqBAIJ, 2861 MatNorm_SeqBAIJ, 2862 /*20*/ 0, 2863 MatAssemblyEnd_SeqBAIJ, 2864 MatSetOption_SeqBAIJ, 2865 MatZeroEntries_SeqBAIJ, 2866 /*24*/ MatZeroRows_SeqBAIJ, 2867 0, 2868 0, 2869 0, 2870 0, 2871 /*29*/ MatSetUpPreallocation_SeqBAIJ, 2872 0, 2873 0, 2874 MatGetArray_SeqBAIJ, 2875 MatRestoreArray_SeqBAIJ, 2876 /*34*/ MatDuplicate_SeqBAIJ, 2877 0, 2878 0, 2879 MatILUFactor_SeqBAIJ, 2880 0, 2881 /*39*/ MatAXPY_SeqBAIJ, 2882 MatGetSubMatrices_SeqBAIJ, 2883 MatIncreaseOverlap_SeqBAIJ, 2884 MatGetValues_SeqBAIJ, 2885 MatCopy_SeqBAIJ, 2886 /*44*/ 0, 2887 MatScale_SeqBAIJ, 2888 0, 2889 0, 2890 0, 2891 /*49*/ MatSetBlockSize_SeqBAIJ, 2892 MatGetRowIJ_SeqBAIJ, 2893 MatRestoreRowIJ_SeqBAIJ, 2894 MatGetColumnIJ_SeqBAIJ, 2895 MatRestoreColumnIJ_SeqBAIJ, 2896 /*54*/ MatFDColoringCreate_SeqAIJ, 2897 0, 2898 0, 2899 0, 2900 MatSetValuesBlocked_SeqBAIJ, 2901 /*59*/ MatGetSubMatrix_SeqBAIJ, 2902 MatDestroy_SeqBAIJ, 2903 MatView_SeqBAIJ, 2904 0, 2905 0, 2906 /*64*/ 0, 2907 0, 2908 0, 2909 0, 2910 0, 2911 /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2912 0, 2913 MatConvert_Basic, 2914 0, 2915 0, 2916 /*74*/ 0, 2917 MatFDColoringApply_BAIJ, 2918 0, 2919 0, 2920 0, 2921 /*79*/ 0, 2922 0, 2923 0, 2924 0, 2925 MatLoad_SeqBAIJ, 2926 /*84*/ 0, 2927 0, 2928 0, 2929 0, 2930 0, 2931 /*89*/ 0, 2932 0, 2933 0, 2934 0, 2935 0, 2936 /*94*/ 0, 2937 0, 2938 0, 2939 0, 2940 0, 2941 /*99*/0, 2942 0, 2943 0, 2944 0, 2945 0, 2946 /*104*/0, 2947 MatRealPart_SeqBAIJ, 2948 MatImaginaryPart_SeqBAIJ, 2949 0, 2950 0, 2951 /*109*/0, 2952 0, 2953 0, 2954 0, 2955 MatMissingDiagonal_SeqBAIJ, 2956 /*114*/0, 2957 0, 2958 0, 2959 0, 2960 0, 2961 /*119*/0, 2962 0, 2963 MatMultHermitianTranspose_SeqBAIJ, 2964 MatMultHermitianTransposeAdd_SeqBAIJ, 2965 0, 2966 }; 2967 2968 EXTERN_C_BEGIN 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2971 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2972 { 2973 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2974 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2975 PetscErrorCode ierr; 2976 2977 PetscFunctionBegin; 2978 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2979 2980 /* allocate space for values if not already there */ 2981 if (!aij->saved_values) { 2982 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2983 ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2984 } 2985 2986 /* copy values over */ 2987 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 EXTERN_C_END 2991 2992 EXTERN_C_BEGIN 2993 #undef __FUNCT__ 2994 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2995 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2996 { 2997 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2998 PetscErrorCode ierr; 2999 PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 3000 3001 PetscFunctionBegin; 3002 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3003 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 3004 3005 /* copy values over */ 3006 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 3007 PetscFunctionReturn(0); 3008 } 3009 EXTERN_C_END 3010 3011 EXTERN_C_BEGIN 3012 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 3013 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 3014 EXTERN_C_END 3015 3016 EXTERN_C_BEGIN 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 3019 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 3020 { 3021 Mat_SeqBAIJ *b; 3022 PetscErrorCode ierr; 3023 PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 3024 PetscBool flg,skipallocation = PETSC_FALSE; 3025 3026 PetscFunctionBegin; 3027 3028 if (nz == MAT_SKIP_ALLOCATION) { 3029 skipallocation = PETSC_TRUE; 3030 nz = 0; 3031 } 3032 3033 if (bs < 0) { 3034 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 3035 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3036 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3037 bs = PetscAbs(bs); 3038 } 3039 if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 3040 bs = newbs; 3041 3042 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3043 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3044 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3045 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3046 3047 B->preallocated = PETSC_TRUE; 3048 3049 mbs = B->rmap->n/bs; 3050 nbs = B->cmap->n/bs; 3051 bs2 = bs*bs; 3052 3053 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 3054 3055 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3056 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 3057 if (nnz) { 3058 for (i=0; i<mbs; i++) { 3059 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 3060 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 3061 } 3062 } 3063 3064 b = (Mat_SeqBAIJ*)B->data; 3065 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 3066 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3067 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3068 3069 if (!flg) { 3070 switch (bs) { 3071 case 1: 3072 B->ops->mult = MatMult_SeqBAIJ_1; 3073 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3074 B->ops->sor = MatSOR_SeqBAIJ_1; 3075 break; 3076 case 2: 3077 B->ops->mult = MatMult_SeqBAIJ_2; 3078 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3079 B->ops->sor = MatSOR_SeqBAIJ_2; 3080 break; 3081 case 3: 3082 B->ops->mult = MatMult_SeqBAIJ_3; 3083 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3084 B->ops->sor = MatSOR_SeqBAIJ_3; 3085 break; 3086 case 4: 3087 B->ops->mult = MatMult_SeqBAIJ_4; 3088 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3089 B->ops->sor = MatSOR_SeqBAIJ_4; 3090 break; 3091 case 5: 3092 B->ops->mult = MatMult_SeqBAIJ_5; 3093 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3094 B->ops->sor = MatSOR_SeqBAIJ_5; 3095 break; 3096 case 6: 3097 B->ops->mult = MatMult_SeqBAIJ_6; 3098 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3099 B->ops->sor = MatSOR_SeqBAIJ_6; 3100 break; 3101 case 7: 3102 B->ops->mult = MatMult_SeqBAIJ_7; 3103 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3104 B->ops->sor = MatSOR_SeqBAIJ_7; 3105 break; 3106 case 15: 3107 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3108 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3109 B->ops->sor = MatSOR_SeqBAIJ_N; 3110 break; 3111 default: 3112 B->ops->mult = MatMult_SeqBAIJ_N; 3113 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3114 B->ops->sor = MatSOR_SeqBAIJ_N; 3115 break; 3116 } 3117 } 3118 B->rmap->bs = bs; 3119 b->mbs = mbs; 3120 b->nbs = nbs; 3121 if (!skipallocation) { 3122 if (!b->imax) { 3123 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 3124 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt)); 3125 b->free_imax_ilen = PETSC_TRUE; 3126 } 3127 /* b->ilen will count nonzeros in each block row so far. */ 3128 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 3129 if (!nnz) { 3130 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3131 else if (nz < 0) nz = 1; 3132 for (i=0; i<mbs; i++) b->imax[i] = nz; 3133 nz = nz*mbs; 3134 } else { 3135 nz = 0; 3136 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 3137 } 3138 3139 /* allocate the matrix space */ 3140 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 3141 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 3142 ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 3143 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 3144 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3145 b->singlemalloc = PETSC_TRUE; 3146 b->i[0] = 0; 3147 for (i=1; i<mbs+1; i++) { 3148 b->i[i] = b->i[i-1] + b->imax[i-1]; 3149 } 3150 b->free_a = PETSC_TRUE; 3151 b->free_ij = PETSC_TRUE; 3152 } else { 3153 b->free_a = PETSC_FALSE; 3154 b->free_ij = PETSC_FALSE; 3155 } 3156 3157 B->rmap->bs = bs; 3158 b->bs2 = bs2; 3159 b->mbs = mbs; 3160 b->nz = 0; 3161 b->maxnz = nz; 3162 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3163 PetscFunctionReturn(0); 3164 } 3165 EXTERN_C_END 3166 3167 EXTERN_C_BEGIN 3168 #undef __FUNCT__ 3169 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3170 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3171 { 3172 PetscInt i,m,nz,nz_max=0,*nnz; 3173 PetscScalar *values=0; 3174 PetscErrorCode ierr; 3175 3176 PetscFunctionBegin; 3177 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3178 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3179 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3180 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3181 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3182 m = B->rmap->n/bs; 3183 3184 if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3185 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3186 for(i=0; i<m; i++) { 3187 nz = ii[i+1]- ii[i]; 3188 if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3189 nz_max = PetscMax(nz_max, nz); 3190 nnz[i] = nz; 3191 } 3192 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3193 ierr = PetscFree(nnz);CHKERRQ(ierr); 3194 3195 values = (PetscScalar*)V; 3196 if (!values) { 3197 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3198 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3199 } 3200 for (i=0; i<m; i++) { 3201 PetscInt ncols = ii[i+1] - ii[i]; 3202 const PetscInt *icols = jj + ii[i]; 3203 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3204 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3205 } 3206 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3207 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3208 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 3210 PetscFunctionReturn(0); 3211 } 3212 EXTERN_C_END 3213 3214 3215 EXTERN_C_BEGIN 3216 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3217 #if defined(PETSC_HAVE_MUMPS) 3218 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3219 #endif 3220 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3221 EXTERN_C_END 3222 3223 /*MC 3224 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3225 block sparse compressed row format. 3226 3227 Options Database Keys: 3228 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3229 3230 Level: beginner 3231 3232 .seealso: MatCreateSeqBAIJ() 3233 M*/ 3234 3235 3236 EXTERN_C_BEGIN 3237 #undef __FUNCT__ 3238 #define __FUNCT__ "MatCreate_SeqBAIJ" 3239 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 3240 { 3241 PetscErrorCode ierr; 3242 PetscMPIInt size; 3243 Mat_SeqBAIJ *b; 3244 3245 PetscFunctionBegin; 3246 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 3247 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3248 3249 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3250 B->data = (void*)b; 3251 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3252 B->mapping = 0; 3253 b->row = 0; 3254 b->col = 0; 3255 b->icol = 0; 3256 b->reallocs = 0; 3257 b->saved_values = 0; 3258 3259 b->roworiented = PETSC_TRUE; 3260 b->nonew = 0; 3261 b->diag = 0; 3262 b->solve_work = 0; 3263 b->mult_work = 0; 3264 B->spptr = 0; 3265 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3266 b->keepnonzeropattern = PETSC_FALSE; 3267 b->xtoy = 0; 3268 b->XtoY = 0; 3269 b->compressedrow.use = PETSC_FALSE; 3270 b->compressedrow.nrows = 0; 3271 b->compressedrow.i = PETSC_NULL; 3272 b->compressedrow.rindex = PETSC_NULL; 3273 b->compressedrow.checked = PETSC_FALSE; 3274 B->same_nonzero = PETSC_FALSE; 3275 3276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C", 3277 "MatGetFactorAvailable_seqbaij_petsc", 3278 MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C", 3280 "MatGetFactor_seqbaij_petsc", 3281 MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3282 #if defined(PETSC_HAVE_MUMPS) 3283 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3284 #endif 3285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 3286 "MatInvertBlockDiagonal_SeqBAIJ", 3287 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3288 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3289 "MatStoreValues_SeqBAIJ", 3290 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3291 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3292 "MatRetrieveValues_SeqBAIJ", 3293 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3294 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 3295 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3296 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3297 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3298 "MatConvert_SeqBAIJ_SeqAIJ", 3299 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3300 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3301 "MatConvert_SeqBAIJ_SeqSBAIJ", 3302 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3303 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3304 "MatSeqBAIJSetPreallocation_SeqBAIJ", 3305 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3306 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3307 "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3308 MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3309 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 EXTERN_C_END 3313 3314 #undef __FUNCT__ 3315 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3316 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3317 { 3318 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3319 PetscErrorCode ierr; 3320 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3321 3322 PetscFunctionBegin; 3323 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3324 3325 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3326 c->imax = a->imax; 3327 c->ilen = a->ilen; 3328 c->free_imax_ilen = PETSC_FALSE; 3329 } else { 3330 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3331 ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3332 for (i=0; i<mbs; i++) { 3333 c->imax[i] = a->imax[i]; 3334 c->ilen[i] = a->ilen[i]; 3335 } 3336 c->free_imax_ilen = PETSC_TRUE; 3337 } 3338 3339 /* allocate the matrix space */ 3340 if (mallocmatspace){ 3341 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3342 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3343 ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3344 c->singlemalloc = PETSC_FALSE; 3345 c->free_ij = PETSC_FALSE; 3346 c->i = a->i; 3347 c->j = a->j; 3348 c->parent = A; 3349 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3350 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3351 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3352 } else { 3353 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3354 ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3355 c->singlemalloc = PETSC_TRUE; 3356 c->free_ij = PETSC_TRUE; 3357 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3358 if (mbs > 0) { 3359 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3360 if (cpvalues == MAT_COPY_VALUES) { 3361 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3362 } else { 3363 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3364 } 3365 } 3366 } 3367 } 3368 3369 c->roworiented = a->roworiented; 3370 c->nonew = a->nonew; 3371 ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr); 3372 ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr); 3373 c->bs2 = a->bs2; 3374 c->mbs = a->mbs; 3375 c->nbs = a->nbs; 3376 3377 if (a->diag) { 3378 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3379 c->diag = a->diag; 3380 c->free_diag = PETSC_FALSE; 3381 } else { 3382 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3383 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3384 for (i=0; i<mbs; i++) { 3385 c->diag[i] = a->diag[i]; 3386 } 3387 c->free_diag = PETSC_TRUE; 3388 } 3389 } else c->diag = 0; 3390 c->nz = a->nz; 3391 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3392 c->solve_work = 0; 3393 c->mult_work = 0; 3394 c->free_a = PETSC_TRUE; 3395 c->free_ij = PETSC_TRUE; 3396 C->preallocated = PETSC_TRUE; 3397 C->assembled = PETSC_TRUE; 3398 3399 c->compressedrow.use = a->compressedrow.use; 3400 c->compressedrow.nrows = a->compressedrow.nrows; 3401 c->compressedrow.checked = a->compressedrow.checked; 3402 if (a->compressedrow.checked && a->compressedrow.use){ 3403 i = a->compressedrow.nrows; 3404 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3405 ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3406 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3407 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3408 } else { 3409 c->compressedrow.use = PETSC_FALSE; 3410 c->compressedrow.i = PETSC_NULL; 3411 c->compressedrow.rindex = PETSC_NULL; 3412 } 3413 C->same_nonzero = A->same_nonzero; 3414 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3415 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 3419 #undef __FUNCT__ 3420 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3421 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3422 { 3423 PetscErrorCode ierr; 3424 3425 PetscFunctionBegin; 3426 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3427 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3428 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3429 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE); 3430 PetscFunctionReturn(0); 3431 } 3432 3433 #undef __FUNCT__ 3434 #define __FUNCT__ "MatLoad_SeqBAIJ" 3435 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3436 { 3437 Mat_SeqBAIJ *a; 3438 PetscErrorCode ierr; 3439 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3440 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3441 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3442 PetscInt *masked,nmask,tmp,bs2,ishift; 3443 PetscMPIInt size; 3444 int fd; 3445 PetscScalar *aa; 3446 MPI_Comm comm = ((PetscObject)viewer)->comm; 3447 3448 PetscFunctionBegin; 3449 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3450 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3451 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3452 bs2 = bs*bs; 3453 3454 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3455 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3456 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3457 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3458 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3459 M = header[1]; N = header[2]; nz = header[3]; 3460 3461 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3462 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3463 3464 /* 3465 This code adds extra rows to make sure the number of rows is 3466 divisible by the blocksize 3467 */ 3468 mbs = M/bs; 3469 extra_rows = bs - M + bs*(mbs); 3470 if (extra_rows == bs) extra_rows = 0; 3471 else mbs++; 3472 if (extra_rows) { 3473 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3474 } 3475 3476 /* Set global sizes if not already set */ 3477 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3478 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3479 } else { /* Check if the matrix global sizes are correct */ 3480 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3481 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3482 } 3483 3484 /* read in row lengths */ 3485 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3486 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3487 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3488 3489 /* read in column indices */ 3490 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3491 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3492 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3493 3494 /* loop over row lengths determining block row lengths */ 3495 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3496 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3497 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3498 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3499 rowcount = 0; 3500 nzcount = 0; 3501 for (i=0; i<mbs; i++) { 3502 nmask = 0; 3503 for (j=0; j<bs; j++) { 3504 kmax = rowlengths[rowcount]; 3505 for (k=0; k<kmax; k++) { 3506 tmp = jj[nzcount++]/bs; 3507 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3508 } 3509 rowcount++; 3510 } 3511 browlengths[i] += nmask; 3512 /* zero out the mask elements we set */ 3513 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3514 } 3515 3516 /* Do preallocation */ 3517 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3518 a = (Mat_SeqBAIJ*)newmat->data; 3519 3520 /* set matrix "i" values */ 3521 a->i[0] = 0; 3522 for (i=1; i<= mbs; i++) { 3523 a->i[i] = a->i[i-1] + browlengths[i-1]; 3524 a->ilen[i-1] = browlengths[i-1]; 3525 } 3526 a->nz = 0; 3527 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3528 3529 /* read in nonzero values */ 3530 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3531 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3532 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3533 3534 /* set "a" and "j" values into matrix */ 3535 nzcount = 0; jcount = 0; 3536 for (i=0; i<mbs; i++) { 3537 nzcountb = nzcount; 3538 nmask = 0; 3539 for (j=0; j<bs; j++) { 3540 kmax = rowlengths[i*bs+j]; 3541 for (k=0; k<kmax; k++) { 3542 tmp = jj[nzcount++]/bs; 3543 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3544 } 3545 } 3546 /* sort the masked values */ 3547 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3548 3549 /* set "j" values into matrix */ 3550 maskcount = 1; 3551 for (j=0; j<nmask; j++) { 3552 a->j[jcount++] = masked[j]; 3553 mask[masked[j]] = maskcount++; 3554 } 3555 /* set "a" values into matrix */ 3556 ishift = bs2*a->i[i]; 3557 for (j=0; j<bs; j++) { 3558 kmax = rowlengths[i*bs+j]; 3559 for (k=0; k<kmax; k++) { 3560 tmp = jj[nzcountb]/bs ; 3561 block = mask[tmp] - 1; 3562 point = jj[nzcountb] - bs*tmp; 3563 idx = ishift + bs2*block + j + bs*point; 3564 a->a[idx] = (MatScalar)aa[nzcountb++]; 3565 } 3566 } 3567 /* zero out the mask elements we set */ 3568 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3569 } 3570 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3571 3572 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3573 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3574 ierr = PetscFree(aa);CHKERRQ(ierr); 3575 ierr = PetscFree(jj);CHKERRQ(ierr); 3576 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3577 3578 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3579 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3580 ierr = MatView_Private(newmat);CHKERRQ(ierr); 3581 PetscFunctionReturn(0); 3582 } 3583 3584 #undef __FUNCT__ 3585 #define __FUNCT__ "MatCreateSeqBAIJ" 3586 /*@C 3587 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3588 compressed row) format. For good matrix assembly performance the 3589 user should preallocate the matrix storage by setting the parameter nz 3590 (or the array nnz). By setting these parameters accurately, performance 3591 during matrix assembly can be increased by more than a factor of 50. 3592 3593 Collective on MPI_Comm 3594 3595 Input Parameters: 3596 + comm - MPI communicator, set to PETSC_COMM_SELF 3597 . bs - size of block 3598 . m - number of rows 3599 . n - number of columns 3600 . nz - number of nonzero blocks per block row (same for all rows) 3601 - nnz - array containing the number of nonzero blocks in the various block rows 3602 (possibly different for each block row) or PETSC_NULL 3603 3604 Output Parameter: 3605 . A - the matrix 3606 3607 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3608 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3609 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3610 3611 Options Database Keys: 3612 . -mat_no_unroll - uses code that does not unroll the loops in the 3613 block calculations (much slower) 3614 . -mat_block_size - size of the blocks to use 3615 3616 Level: intermediate 3617 3618 Notes: 3619 The number of rows and columns must be divisible by blocksize. 3620 3621 If the nnz parameter is given then the nz parameter is ignored 3622 3623 A nonzero block is any block that as 1 or more nonzeros in it 3624 3625 The block AIJ format is fully compatible with standard Fortran 77 3626 storage. That is, the stored row and column indices can begin at 3627 either one (as in Fortran) or zero. See the users' manual for details. 3628 3629 Specify the preallocated storage with either nz or nnz (not both). 3630 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3631 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3632 matrices. 3633 3634 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3635 @*/ 3636 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3637 { 3638 PetscErrorCode ierr; 3639 3640 PetscFunctionBegin; 3641 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3642 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3643 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3644 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3645 PetscFunctionReturn(0); 3646 } 3647 3648 #undef __FUNCT__ 3649 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3650 /*@C 3651 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3652 per row in the matrix. For good matrix assembly performance the 3653 user should preallocate the matrix storage by setting the parameter nz 3654 (or the array nnz). By setting these parameters accurately, performance 3655 during matrix assembly can be increased by more than a factor of 50. 3656 3657 Collective on MPI_Comm 3658 3659 Input Parameters: 3660 + A - the matrix 3661 . bs - size of block 3662 . nz - number of block nonzeros per block row (same for all rows) 3663 - nnz - array containing the number of block nonzeros in the various block rows 3664 (possibly different for each block row) or PETSC_NULL 3665 3666 Options Database Keys: 3667 . -mat_no_unroll - uses code that does not unroll the loops in the 3668 block calculations (much slower) 3669 . -mat_block_size - size of the blocks to use 3670 3671 Level: intermediate 3672 3673 Notes: 3674 If the nnz parameter is given then the nz parameter is ignored 3675 3676 You can call MatGetInfo() to get information on how effective the preallocation was; 3677 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3678 You can also run with the option -info and look for messages with the string 3679 malloc in them to see if additional memory allocation was needed. 3680 3681 The block AIJ format is fully compatible with standard Fortran 77 3682 storage. That is, the stored row and column indices can begin at 3683 either one (as in Fortran) or zero. See the users' manual for details. 3684 3685 Specify the preallocated storage with either nz or nnz (not both). 3686 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3687 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3688 3689 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3690 @*/ 3691 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3692 { 3693 PetscErrorCode ierr; 3694 3695 PetscFunctionBegin; 3696 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3697 PetscFunctionReturn(0); 3698 } 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3702 /*@C 3703 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3704 (the default sequential PETSc format). 3705 3706 Collective on MPI_Comm 3707 3708 Input Parameters: 3709 + A - the matrix 3710 . i - the indices into j for the start of each local row (starts with zero) 3711 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3712 - v - optional values in the matrix 3713 3714 Level: developer 3715 3716 .keywords: matrix, aij, compressed row, sparse 3717 3718 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3719 @*/ 3720 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3721 { 3722 PetscErrorCode ierr; 3723 3724 PetscFunctionBegin; 3725 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3726 PetscFunctionReturn(0); 3727 } 3728 3729 3730 #undef __FUNCT__ 3731 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3732 /*@ 3733 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3734 3735 Collective on MPI_Comm 3736 3737 Input Parameters: 3738 + comm - must be an MPI communicator of size 1 3739 . bs - size of block 3740 . m - number of rows 3741 . n - number of columns 3742 . i - row indices 3743 . j - column indices 3744 - a - matrix values 3745 3746 Output Parameter: 3747 . mat - the matrix 3748 3749 Level: advanced 3750 3751 Notes: 3752 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3753 once the matrix is destroyed 3754 3755 You cannot set new nonzero locations into this matrix, that will generate an error. 3756 3757 The i and j indices are 0 based 3758 3759 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3760 3761 3762 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3763 3764 @*/ 3765 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3766 { 3767 PetscErrorCode ierr; 3768 PetscInt ii; 3769 Mat_SeqBAIJ *baij; 3770 3771 PetscFunctionBegin; 3772 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3773 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3774 3775 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3776 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3777 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3778 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3779 baij = (Mat_SeqBAIJ*)(*mat)->data; 3780 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3781 ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3782 3783 baij->i = i; 3784 baij->j = j; 3785 baij->a = a; 3786 baij->singlemalloc = PETSC_FALSE; 3787 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3788 baij->free_a = PETSC_FALSE; 3789 baij->free_ij = PETSC_FALSE; 3790 3791 for (ii=0; ii<m; ii++) { 3792 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3793 #if defined(PETSC_USE_DEBUG) 3794 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3795 #endif 3796 } 3797 #if defined(PETSC_USE_DEBUG) 3798 for (ii=0; ii<baij->i[m]; ii++) { 3799 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3800 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3801 } 3802 #endif 3803 3804 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3805 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3806 PetscFunctionReturn(0); 3807 } 3808