1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 233 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 250 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 251 i2 = 3; 252 idiag += 9; 253 for (i=1; i<m; i++) { 254 v = aa + 9*ai[i]; 255 vi = aj + ai[i]; 256 nz = diag[i] - ai[i]; 257 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 258 while (nz--) { 259 idx = 3*(*vi++); 260 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 261 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 262 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 263 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 264 v += 9; 265 } 266 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 267 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 268 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 269 idiag += 9; 270 i2 += 3; 271 } 272 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 273 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 274 } 275 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 276 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 277 i2 = 0; 278 mdiag = a->idiag+9*a->mbs; 279 for (i=0; i<m; i++) { 280 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 281 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 282 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 283 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 284 mdiag += 9; 285 i2 += 3; 286 } 287 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 288 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 289 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 290 } 291 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 292 idiag = a->idiag+9*a->mbs - 9; 293 i2 = 3*m - 3; 294 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 295 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 296 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 297 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 298 idiag -= 9; 299 i2 -= 3; 300 for (i=m-2; i>=0; i--) { 301 v = aa + 9*(diag[i]+1); 302 vi = aj + diag[i] + 1; 303 nz = ai[i+1] - diag[i] - 1; 304 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 305 while (nz--) { 306 idx = 3*(*vi++); 307 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 308 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 309 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 310 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 311 v += 9; 312 } 313 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 314 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 315 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 316 idiag -= 9; 317 i2 -= 3; 318 } 319 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 320 } 321 } else { 322 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 323 } 324 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 325 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 331 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 332 { 333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 334 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 335 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 336 PetscErrorCode ierr; 337 PetscInt m = a->mbs,i,i2,nz,idx; 338 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 339 340 PetscFunctionBegin; 341 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 342 its = its*lits; 343 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 344 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 345 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 346 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 347 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 348 349 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 350 351 diag = a->diag; 352 idiag = a->idiag; 353 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 355 356 if (flag & SOR_ZERO_INITIAL_GUESS) { 357 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 358 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 359 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 360 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 361 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 362 i2 = 4; 363 idiag += 16; 364 for (i=1; i<m; i++) { 365 v = aa + 16*ai[i]; 366 vi = aj + ai[i]; 367 nz = diag[i] - ai[i]; 368 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 369 while (nz--) { 370 idx = 4*(*vi++); 371 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 372 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 373 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 374 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 375 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 376 v += 16; 377 } 378 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 379 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 380 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 381 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 382 idiag += 16; 383 i2 += 4; 384 } 385 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 386 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 387 } 388 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 389 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 390 i2 = 0; 391 mdiag = a->idiag+16*a->mbs; 392 for (i=0; i<m; i++) { 393 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 394 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 395 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 396 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 397 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 398 mdiag += 16; 399 i2 += 4; 400 } 401 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 402 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 403 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 404 } 405 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 406 idiag = a->idiag+16*a->mbs - 16; 407 i2 = 4*m - 4; 408 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 409 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 410 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 411 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 412 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 413 idiag -= 16; 414 i2 -= 4; 415 for (i=m-2; i>=0; i--) { 416 v = aa + 16*(diag[i]+1); 417 vi = aj + diag[i] + 1; 418 nz = ai[i+1] - diag[i] - 1; 419 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 420 while (nz--) { 421 idx = 4*(*vi++); 422 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 423 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 424 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 425 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 426 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 427 v += 16; 428 } 429 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 430 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 431 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 432 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 433 idiag -= 16; 434 i2 -= 4; 435 } 436 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 437 } 438 } else { 439 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 440 } 441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 442 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 448 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 449 { 450 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 451 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 452 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 453 PetscErrorCode ierr; 454 PetscInt m = a->mbs,i,i2,nz,idx; 455 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 456 457 PetscFunctionBegin; 458 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 459 its = its*lits; 460 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 461 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 462 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 463 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 464 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 465 466 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 467 468 diag = a->diag; 469 idiag = a->idiag; 470 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 471 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 472 473 if (flag & SOR_ZERO_INITIAL_GUESS) { 474 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 475 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 476 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 477 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 478 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 479 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 480 i2 = 5; 481 idiag += 25; 482 for (i=1; i<m; i++) { 483 v = aa + 25*ai[i]; 484 vi = aj + ai[i]; 485 nz = diag[i] - ai[i]; 486 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 487 while (nz--) { 488 idx = 5*(*vi++); 489 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 490 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 491 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 492 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 493 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 494 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 495 v += 25; 496 } 497 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 498 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 499 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 500 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 501 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 502 idiag += 25; 503 i2 += 5; 504 } 505 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 507 } 508 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510 i2 = 0; 511 mdiag = a->idiag+25*a->mbs; 512 for (i=0; i<m; i++) { 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 514 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 515 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 516 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 517 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 518 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 519 mdiag += 25; 520 i2 += 5; 521 } 522 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 523 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 524 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 525 } 526 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 527 idiag = a->idiag+25*a->mbs - 25; 528 i2 = 5*m - 5; 529 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 530 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 531 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 532 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 533 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 534 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 535 idiag -= 25; 536 i2 -= 5; 537 for (i=m-2; i>=0; i--) { 538 v = aa + 25*(diag[i]+1); 539 vi = aj + diag[i] + 1; 540 nz = ai[i+1] - diag[i] - 1; 541 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 542 while (nz--) { 543 idx = 5*(*vi++); 544 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 545 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 546 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 547 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 548 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 549 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 550 v += 25; 551 } 552 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 553 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 554 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 555 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 556 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 557 idiag -= 25; 558 i2 -= 5; 559 } 560 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 561 } 562 } else { 563 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 564 } 565 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 566 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 /* 571 Special version for direct calls from Fortran (Used in PETSc-fun3d) 572 */ 573 #if defined(PETSC_HAVE_FORTRAN_CAPS) 574 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 575 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 576 #define matsetvaluesblocked4_ matsetvaluesblocked4 577 #endif 578 579 EXTERN_C_BEGIN 580 #undef __FUNCT__ 581 #define __FUNCT__ "matsetvaluesblocked4_" 582 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 583 { 584 Mat A = *AA; 585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 586 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 587 PetscInt *ai=a->i,*ailen=a->ilen; 588 PetscInt *aj=a->j,stepval,lastcol = -1; 589 const PetscScalar *value = v; 590 MatScalar *ap,*aa = a->a,*bap; 591 592 PetscFunctionBegin; 593 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 594 stepval = (n-1)*4; 595 for (k=0; k<m; k++) { /* loop over added rows */ 596 row = im[k]; 597 rp = aj + ai[row]; 598 ap = aa + 16*ai[row]; 599 nrow = ailen[row]; 600 low = 0; 601 high = nrow; 602 for (l=0; l<n; l++) { /* loop over added columns */ 603 col = in[l]; 604 if (col <= lastcol) low = 0; else high = nrow; 605 lastcol = col; 606 value = v + k*(stepval+4 + l)*4; 607 while (high-low > 7) { 608 t = (low+high)/2; 609 if (rp[t] > col) high = t; 610 else low = t; 611 } 612 for (i=low; i<high; i++) { 613 if (rp[i] > col) break; 614 if (rp[i] == col) { 615 bap = ap + 16*i; 616 for (ii=0; ii<4; ii++,value+=stepval) { 617 for (jj=ii; jj<16; jj+=4) { 618 bap[jj] += *value++; 619 } 620 } 621 goto noinsert2; 622 } 623 } 624 N = nrow++ - 1; 625 high++; /* added new column index thus must search to one higher than before */ 626 /* shift up all the later entries in this row */ 627 for (ii=N; ii>=i; ii--) { 628 rp[ii+1] = rp[ii]; 629 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 630 } 631 if (N >= i) { 632 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 633 } 634 rp[i] = col; 635 bap = ap + 16*i; 636 for (ii=0; ii<4; ii++,value+=stepval) { 637 for (jj=ii; jj<16; jj+=4) { 638 bap[jj] = *value++; 639 } 640 } 641 noinsert2:; 642 low = i; 643 } 644 ailen[row] = nrow; 645 } 646 PetscFunctionReturnVoid(); 647 } 648 EXTERN_C_END 649 650 #if defined(PETSC_HAVE_FORTRAN_CAPS) 651 #define matsetvalues4_ MATSETVALUES4 652 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 653 #define matsetvalues4_ matsetvalues4 654 #endif 655 656 EXTERN_C_BEGIN 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetValues4_" 659 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 660 { 661 Mat A = *AA; 662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 663 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 664 PetscInt *ai=a->i,*ailen=a->ilen; 665 PetscInt *aj=a->j,brow,bcol; 666 PetscInt ridx,cidx,lastcol = -1; 667 MatScalar *ap,value,*aa=a->a,*bap; 668 669 PetscFunctionBegin; 670 for (k=0; k<m; k++) { /* loop over added rows */ 671 row = im[k]; brow = row/4; 672 rp = aj + ai[brow]; 673 ap = aa + 16*ai[brow]; 674 nrow = ailen[brow]; 675 low = 0; 676 high = nrow; 677 for (l=0; l<n; l++) { /* loop over added columns */ 678 col = in[l]; bcol = col/4; 679 ridx = row % 4; cidx = col % 4; 680 value = v[l + k*n]; 681 if (col <= lastcol) low = 0; else high = nrow; 682 lastcol = col; 683 while (high-low > 7) { 684 t = (low+high)/2; 685 if (rp[t] > bcol) high = t; 686 else low = t; 687 } 688 for (i=low; i<high; i++) { 689 if (rp[i] > bcol) break; 690 if (rp[i] == bcol) { 691 bap = ap + 16*i + 4*cidx + ridx; 692 *bap += value; 693 goto noinsert1; 694 } 695 } 696 N = nrow++ - 1; 697 high++; /* added new column thus must search to one higher than before */ 698 /* shift up all the later entries in this row */ 699 for (ii=N; ii>=i; ii--) { 700 rp[ii+1] = rp[ii]; 701 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 702 } 703 if (N>=i) { 704 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 705 } 706 rp[i] = bcol; 707 ap[16*i + 4*cidx + ridx] = value; 708 noinsert1:; 709 low = i; 710 } 711 ailen[brow] = nrow; 712 } 713 PetscFunctionReturnVoid(); 714 } 715 EXTERN_C_END 716 717 /* UGLY, ugly, ugly 718 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 719 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 720 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 721 converts the entries into single precision and then calls ..._MatScalar() to put them 722 into the single precision data structures. 723 */ 724 #if defined(PETSC_USE_MAT_SINGLE) 725 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 726 #else 727 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 728 #endif 729 730 #define CHUNKSIZE 10 731 732 /* 733 Checks for missing diagonals 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 737 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 738 { 739 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 740 PetscErrorCode ierr; 741 PetscInt *diag,*jj = a->j,i; 742 743 PetscFunctionBegin; 744 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 745 *missing = PETSC_FALSE; 746 diag = a->diag; 747 for (i=0; i<a->mbs; i++) { 748 if (jj[diag[i]] != i) { 749 *missing = PETSC_TRUE; 750 if (d) *d = i; 751 } 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 758 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 759 { 760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 761 PetscErrorCode ierr; 762 PetscInt i,j,m = a->mbs; 763 764 PetscFunctionBegin; 765 if (!a->diag) { 766 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 767 } 768 for (i=0; i<m; i++) { 769 a->diag[i] = a->i[i+1]; 770 for (j=a->i[i]; j<a->i[i+1]; j++) { 771 if (a->j[j] == i) { 772 a->diag[i] = j; 773 break; 774 } 775 } 776 } 777 PetscFunctionReturn(0); 778 } 779 780 781 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 785 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 786 { 787 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 788 PetscErrorCode ierr; 789 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt; 790 PetscInt *tia, *tja; 791 792 PetscFunctionBegin; 793 *nn = n; 794 if (!ia) PetscFunctionReturn(0); 795 if (symmetric) { 796 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 797 } else { 798 tia = a->i; tja = a->j; 799 } 800 801 if (!blockcompressed && bs > 1) { 802 (*nn) *= bs; 803 nbs = bs; 804 /* malloc & create the natural set of indices */ 805 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 806 if (n) { 807 (*ia)[0] = 0; 808 for (j=1; j<bs; j++) { 809 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 810 } 811 } 812 813 for (i=1; i<n; i++) { 814 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 815 for (j=1; j<bs; j++) { 816 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 817 } 818 } 819 if (n) { 820 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 821 } 822 823 if (ja) { 824 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 825 cnt = 0; 826 for (i=0; i<n; i++) { 827 for (j=0; j<bs; j++) { 828 for (k=tia[i]; k<tia[i+1]; k++) { 829 for (l=0; l<bs; l++) { 830 (*ja)[cnt++] = bs*tja[k] + l; 831 } 832 } 833 } 834 } 835 } 836 837 n *= bs; 838 nz *= bs*bs; 839 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 840 ierr = PetscFree(tia);CHKERRQ(ierr); 841 ierr = PetscFree(tja);CHKERRQ(ierr); 842 } 843 } else { 844 *ia = tia; 845 if (ja) *ja = tja; 846 } 847 if (oshift == 1) { 848 for (i=0; i<n+nbs; i++) (*ia)[i]++; 849 if (ja) for (i=0; i<nz; i++) (*ja)[i]++; 850 } 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 856 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 857 { 858 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 859 PetscErrorCode ierr; 860 PetscInt i,n = a->mbs,nz = a->i[n]; 861 862 PetscFunctionBegin; 863 if (!ia) PetscFunctionReturn(0); 864 if (!blockcompressed && A->rmap.bs > 1) { 865 ierr = PetscFree(*ia);CHKERRQ(ierr); 866 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 867 } else if (symmetric) { 868 ierr = PetscFree(*ia);CHKERRQ(ierr); 869 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 870 } else if (oshift == 1) { /* blockcompressed */ 871 for (i=0; i<n+1; i++) a->i[i]--; 872 if (ja) {for (i=0; i<nz; i++) a->j[i]--;} 873 } 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatDestroy_SeqBAIJ" 879 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 880 { 881 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 882 PetscErrorCode ierr; 883 884 PetscFunctionBegin; 885 #if defined(PETSC_USE_LOG) 886 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 887 #endif 888 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 889 if (a->row) { 890 ierr = ISDestroy(a->row);CHKERRQ(ierr); 891 } 892 if (a->col) { 893 ierr = ISDestroy(a->col);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(a->diag);CHKERRQ(ierr); 896 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 897 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 898 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 899 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 900 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 901 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 902 #if defined(PETSC_USE_MAT_SINGLE) 903 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 904 #endif 905 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 906 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 907 908 ierr = PetscFree(a);CHKERRQ(ierr); 909 910 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 911 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 912 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 913 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 914 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 915 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 916 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 917 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 918 PetscFunctionReturn(0); 919 } 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "MatSetOption_SeqBAIJ" 923 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 924 { 925 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 926 PetscErrorCode ierr; 927 928 PetscFunctionBegin; 929 switch (op) { 930 case MAT_ROW_ORIENTED: 931 a->roworiented = flg; 932 break; 933 case MAT_KEEP_ZEROED_ROWS: 934 a->keepzeroedrows = flg; 935 break; 936 case MAT_NEW_NONZERO_LOCATIONS: 937 a->nonew = (flg ? 0 : 1); 938 break; 939 case MAT_NEW_NONZERO_LOCATION_ERR: 940 a->nonew = (flg ? -1 : 0); 941 break; 942 case MAT_NEW_NONZERO_ALLOCATION_ERR: 943 a->nonew = (flg ? -2 : 0); 944 break; 945 case MAT_NEW_DIAGONALS: 946 case MAT_IGNORE_OFF_PROC_ENTRIES: 947 case MAT_USE_HASH_TABLE: 948 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 949 break; 950 case MAT_SYMMETRIC: 951 case MAT_STRUCTURALLY_SYMMETRIC: 952 case MAT_HERMITIAN: 953 case MAT_SYMMETRY_ETERNAL: 954 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 955 break; 956 default: 957 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatGetRow_SeqBAIJ" 964 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 965 { 966 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 967 PetscErrorCode ierr; 968 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 969 MatScalar *aa,*aa_i; 970 PetscScalar *v_i; 971 972 PetscFunctionBegin; 973 bs = A->rmap.bs; 974 ai = a->i; 975 aj = a->j; 976 aa = a->a; 977 bs2 = a->bs2; 978 979 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 980 981 bn = row/bs; /* Block number */ 982 bp = row % bs; /* Block Position */ 983 M = ai[bn+1] - ai[bn]; 984 *nz = bs*M; 985 986 if (v) { 987 *v = 0; 988 if (*nz) { 989 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 990 for (i=0; i<M; i++) { /* for each block in the block row */ 991 v_i = *v + i*bs; 992 aa_i = aa + bs2*(ai[bn] + i); 993 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 994 } 995 } 996 } 997 998 if (idx) { 999 *idx = 0; 1000 if (*nz) { 1001 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1002 for (i=0; i<M; i++) { /* for each block in the block row */ 1003 idx_i = *idx + i*bs; 1004 itmp = bs*aj[ai[bn] + i]; 1005 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 1006 } 1007 } 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1014 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1015 { 1016 PetscErrorCode ierr; 1017 1018 PetscFunctionBegin; 1019 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1020 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1026 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1027 { 1028 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1029 Mat C; 1030 PetscErrorCode ierr; 1031 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1032 PetscInt *rows,*cols,bs2=a->bs2; 1033 PetscScalar *array; 1034 1035 PetscFunctionBegin; 1036 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1037 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1038 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1039 1040 #if defined(PETSC_USE_MAT_SINGLE) 1041 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1042 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1043 #else 1044 array = a->a; 1045 #endif 1046 1047 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1048 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1049 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1050 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1051 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1052 ierr = PetscFree(col);CHKERRQ(ierr); 1053 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1054 cols = rows + bs; 1055 for (i=0; i<mbs; i++) { 1056 cols[0] = i*bs; 1057 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1058 len = ai[i+1] - ai[i]; 1059 for (j=0; j<len; j++) { 1060 rows[0] = (*aj++)*bs; 1061 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1062 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1063 array += bs2; 1064 } 1065 } 1066 ierr = PetscFree(rows);CHKERRQ(ierr); 1067 #if defined(PETSC_USE_MAT_SINGLE) 1068 ierr = PetscFree(array); 1069 #endif 1070 1071 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1072 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1073 1074 if (B) { 1075 *B = C; 1076 } else { 1077 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1078 } 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1084 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1085 { 1086 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1087 PetscErrorCode ierr; 1088 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1089 int fd; 1090 PetscScalar *aa; 1091 FILE *file; 1092 1093 PetscFunctionBegin; 1094 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1095 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1096 col_lens[0] = MAT_FILE_COOKIE; 1097 1098 col_lens[1] = A->rmap.N; 1099 col_lens[2] = A->cmap.n; 1100 col_lens[3] = a->nz*bs2; 1101 1102 /* store lengths of each row and write (including header) to file */ 1103 count = 0; 1104 for (i=0; i<a->mbs; i++) { 1105 for (j=0; j<bs; j++) { 1106 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1107 } 1108 } 1109 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1110 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1111 1112 /* store column indices (zero start index) */ 1113 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1114 count = 0; 1115 for (i=0; i<a->mbs; i++) { 1116 for (j=0; j<bs; j++) { 1117 for (k=a->i[i]; k<a->i[i+1]; k++) { 1118 for (l=0; l<bs; l++) { 1119 jj[count++] = bs*a->j[k] + l; 1120 } 1121 } 1122 } 1123 } 1124 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1125 ierr = PetscFree(jj);CHKERRQ(ierr); 1126 1127 /* store nonzero values */ 1128 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1129 count = 0; 1130 for (i=0; i<a->mbs; i++) { 1131 for (j=0; j<bs; j++) { 1132 for (k=a->i[i]; k<a->i[i+1]; k++) { 1133 for (l=0; l<bs; l++) { 1134 aa[count++] = a->a[bs2*k + l*bs + j]; 1135 } 1136 } 1137 } 1138 } 1139 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1140 ierr = PetscFree(aa);CHKERRQ(ierr); 1141 1142 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1143 if (file) { 1144 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1151 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1152 { 1153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1154 PetscErrorCode ierr; 1155 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1156 PetscViewerFormat format; 1157 1158 PetscFunctionBegin; 1159 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1160 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1161 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1162 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1163 Mat aij; 1164 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1165 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1166 ierr = MatDestroy(aij);CHKERRQ(ierr); 1167 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1168 PetscFunctionReturn(0); 1169 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1170 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1171 for (i=0; i<a->mbs; i++) { 1172 for (j=0; j<bs; j++) { 1173 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1174 for (k=a->i[i]; k<a->i[i+1]; k++) { 1175 for (l=0; l<bs; l++) { 1176 #if defined(PETSC_USE_COMPLEX) 1177 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1178 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1179 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1180 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1182 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1183 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1184 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1185 } 1186 #else 1187 if (a->a[bs2*k + l*bs + j] != 0.0) { 1188 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1189 } 1190 #endif 1191 } 1192 } 1193 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1194 } 1195 } 1196 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1197 } else { 1198 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1199 for (i=0; i<a->mbs; i++) { 1200 for (j=0; j<bs; j++) { 1201 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1202 for (k=a->i[i]; k<a->i[i+1]; k++) { 1203 for (l=0; l<bs; l++) { 1204 #if defined(PETSC_USE_COMPLEX) 1205 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1206 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1207 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1208 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1209 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1210 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1211 } else { 1212 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1213 } 1214 #else 1215 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1216 #endif 1217 } 1218 } 1219 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1220 } 1221 } 1222 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1223 } 1224 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1230 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1231 { 1232 Mat A = (Mat) Aa; 1233 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1234 PetscErrorCode ierr; 1235 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1236 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1237 MatScalar *aa; 1238 PetscViewer viewer; 1239 1240 PetscFunctionBegin; 1241 1242 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1243 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1244 1245 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1246 1247 /* loop over matrix elements drawing boxes */ 1248 color = PETSC_DRAW_BLUE; 1249 for (i=0,row=0; i<mbs; i++,row+=bs) { 1250 for (j=a->i[i]; j<a->i[i+1]; j++) { 1251 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1252 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1253 aa = a->a + j*bs2; 1254 for (k=0; k<bs; k++) { 1255 for (l=0; l<bs; l++) { 1256 if (PetscRealPart(*aa++) >= 0.) continue; 1257 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1258 } 1259 } 1260 } 1261 } 1262 color = PETSC_DRAW_CYAN; 1263 for (i=0,row=0; i<mbs; i++,row+=bs) { 1264 for (j=a->i[i]; j<a->i[i+1]; j++) { 1265 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1266 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1267 aa = a->a + j*bs2; 1268 for (k=0; k<bs; k++) { 1269 for (l=0; l<bs; l++) { 1270 if (PetscRealPart(*aa++) != 0.) continue; 1271 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1272 } 1273 } 1274 } 1275 } 1276 1277 color = PETSC_DRAW_RED; 1278 for (i=0,row=0; i<mbs; i++,row+=bs) { 1279 for (j=a->i[i]; j<a->i[i+1]; j++) { 1280 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1281 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1282 aa = a->a + j*bs2; 1283 for (k=0; k<bs; k++) { 1284 for (l=0; l<bs; l++) { 1285 if (PetscRealPart(*aa++) <= 0.) continue; 1286 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1287 } 1288 } 1289 } 1290 } 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1296 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1297 { 1298 PetscErrorCode ierr; 1299 PetscReal xl,yl,xr,yr,w,h; 1300 PetscDraw draw; 1301 PetscTruth isnull; 1302 1303 PetscFunctionBegin; 1304 1305 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1306 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1307 1308 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1309 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1310 xr += w; yr += h; xl = -w; yl = -h; 1311 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1312 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1313 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatView_SeqBAIJ" 1319 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1320 { 1321 PetscErrorCode ierr; 1322 PetscTruth iascii,isbinary,isdraw; 1323 1324 PetscFunctionBegin; 1325 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1326 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1327 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1328 if (iascii){ 1329 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1330 } else if (isbinary) { 1331 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1332 } else if (isdraw) { 1333 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1334 } else { 1335 Mat B; 1336 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1337 ierr = MatView(B,viewer);CHKERRQ(ierr); 1338 ierr = MatDestroy(B);CHKERRQ(ierr); 1339 } 1340 PetscFunctionReturn(0); 1341 } 1342 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1346 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1347 { 1348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1349 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1350 PetscInt *ai = a->i,*ailen = a->ilen; 1351 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1352 MatScalar *ap,*aa = a->a; 1353 1354 PetscFunctionBegin; 1355 for (k=0; k<m; k++) { /* loop over rows */ 1356 row = im[k]; brow = row/bs; 1357 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1358 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1359 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1360 nrow = ailen[brow]; 1361 for (l=0; l<n; l++) { /* loop over columns */ 1362 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1363 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1364 col = in[l] ; 1365 bcol = col/bs; 1366 cidx = col%bs; 1367 ridx = row%bs; 1368 high = nrow; 1369 low = 0; /* assume unsorted */ 1370 while (high-low > 5) { 1371 t = (low+high)/2; 1372 if (rp[t] > bcol) high = t; 1373 else low = t; 1374 } 1375 for (i=low; i<high; i++) { 1376 if (rp[i] > bcol) break; 1377 if (rp[i] == bcol) { 1378 *v++ = ap[bs2*i+bs*cidx+ridx]; 1379 goto finished; 1380 } 1381 } 1382 *v++ = 0.0; 1383 finished:; 1384 } 1385 } 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #if defined(PETSC_USE_MAT_SINGLE) 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1392 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1393 { 1394 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1395 PetscErrorCode ierr; 1396 PetscInt i,N = m*n*b->bs2; 1397 MatScalar *vsingle; 1398 1399 PetscFunctionBegin; 1400 if (N > b->setvalueslen) { 1401 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1402 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1403 b->setvalueslen = N; 1404 } 1405 vsingle = b->setvaluescopy; 1406 for (i=0; i<N; i++) { 1407 vsingle[i] = v[i]; 1408 } 1409 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 #endif 1413 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1417 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1418 { 1419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1420 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1421 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1422 PetscErrorCode ierr; 1423 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1424 PetscTruth roworiented=a->roworiented; 1425 const MatScalar *value = v; 1426 MatScalar *ap,*aa = a->a,*bap; 1427 1428 PetscFunctionBegin; 1429 if (roworiented) { 1430 stepval = (n-1)*bs; 1431 } else { 1432 stepval = (m-1)*bs; 1433 } 1434 for (k=0; k<m; k++) { /* loop over added rows */ 1435 row = im[k]; 1436 if (row < 0) continue; 1437 #if defined(PETSC_USE_DEBUG) 1438 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1439 #endif 1440 rp = aj + ai[row]; 1441 ap = aa + bs2*ai[row]; 1442 rmax = imax[row]; 1443 nrow = ailen[row]; 1444 low = 0; 1445 high = nrow; 1446 for (l=0; l<n; l++) { /* loop over added columns */ 1447 if (in[l] < 0) continue; 1448 #if defined(PETSC_USE_DEBUG) 1449 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1450 #endif 1451 col = in[l]; 1452 if (roworiented) { 1453 value = v + k*(stepval+bs)*bs + l*bs; 1454 } else { 1455 value = v + l*(stepval+bs)*bs + k*bs; 1456 } 1457 if (col <= lastcol) low = 0; else high = nrow; 1458 lastcol = col; 1459 while (high-low > 7) { 1460 t = (low+high)/2; 1461 if (rp[t] > col) high = t; 1462 else low = t; 1463 } 1464 for (i=low; i<high; i++) { 1465 if (rp[i] > col) break; 1466 if (rp[i] == col) { 1467 bap = ap + bs2*i; 1468 if (roworiented) { 1469 if (is == ADD_VALUES) { 1470 for (ii=0; ii<bs; ii++,value+=stepval) { 1471 for (jj=ii; jj<bs2; jj+=bs) { 1472 bap[jj] += *value++; 1473 } 1474 } 1475 } else { 1476 for (ii=0; ii<bs; ii++,value+=stepval) { 1477 for (jj=ii; jj<bs2; jj+=bs) { 1478 bap[jj] = *value++; 1479 } 1480 } 1481 } 1482 } else { 1483 if (is == ADD_VALUES) { 1484 for (ii=0; ii<bs; ii++,value+=stepval) { 1485 for (jj=0; jj<bs; jj++) { 1486 *bap++ += *value++; 1487 } 1488 } 1489 } else { 1490 for (ii=0; ii<bs; ii++,value+=stepval) { 1491 for (jj=0; jj<bs; jj++) { 1492 *bap++ = *value++; 1493 } 1494 } 1495 } 1496 } 1497 goto noinsert2; 1498 } 1499 } 1500 if (nonew == 1) goto noinsert2; 1501 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1502 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1503 N = nrow++ - 1; high++; 1504 /* shift up all the later entries in this row */ 1505 for (ii=N; ii>=i; ii--) { 1506 rp[ii+1] = rp[ii]; 1507 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1508 } 1509 if (N >= i) { 1510 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1511 } 1512 rp[i] = col; 1513 bap = ap + bs2*i; 1514 if (roworiented) { 1515 for (ii=0; ii<bs; ii++,value+=stepval) { 1516 for (jj=ii; jj<bs2; jj+=bs) { 1517 bap[jj] = *value++; 1518 } 1519 } 1520 } else { 1521 for (ii=0; ii<bs; ii++,value+=stepval) { 1522 for (jj=0; jj<bs; jj++) { 1523 *bap++ = *value++; 1524 } 1525 } 1526 } 1527 noinsert2:; 1528 low = i; 1529 } 1530 ailen[row] = nrow; 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1537 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1538 { 1539 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1540 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1541 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1542 PetscErrorCode ierr; 1543 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1544 MatScalar *aa = a->a,*ap; 1545 PetscReal ratio=0.6; 1546 1547 PetscFunctionBegin; 1548 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1549 1550 if (m) rmax = ailen[0]; 1551 for (i=1; i<mbs; i++) { 1552 /* move each row back by the amount of empty slots (fshift) before it*/ 1553 fshift += imax[i-1] - ailen[i-1]; 1554 rmax = PetscMax(rmax,ailen[i]); 1555 if (fshift) { 1556 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1557 N = ailen[i]; 1558 for (j=0; j<N; j++) { 1559 ip[j-fshift] = ip[j]; 1560 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1561 } 1562 } 1563 ai[i] = ai[i-1] + ailen[i-1]; 1564 } 1565 if (mbs) { 1566 fshift += imax[mbs-1] - ailen[mbs-1]; 1567 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1568 } 1569 /* reset ilen and imax for each row */ 1570 for (i=0; i<mbs; i++) { 1571 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1572 } 1573 a->nz = ai[mbs]; 1574 1575 /* diagonals may have moved, so kill the diagonal pointers */ 1576 a->idiagvalid = PETSC_FALSE; 1577 if (fshift && a->diag) { 1578 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1579 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1580 a->diag = 0; 1581 } 1582 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1583 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1584 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1585 a->reallocs = 0; 1586 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1587 1588 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1589 if (a->compressedrow.use){ 1590 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1591 } 1592 1593 A->same_nonzero = PETSC_TRUE; 1594 PetscFunctionReturn(0); 1595 } 1596 1597 /* 1598 This function returns an array of flags which indicate the locations of contiguous 1599 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1600 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1601 Assume: sizes should be long enough to hold all the values. 1602 */ 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1605 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1606 { 1607 PetscInt i,j,k,row; 1608 PetscTruth flg; 1609 1610 PetscFunctionBegin; 1611 for (i=0,j=0; i<n; j++) { 1612 row = idx[i]; 1613 if (row%bs!=0) { /* Not the begining of a block */ 1614 sizes[j] = 1; 1615 i++; 1616 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1617 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1618 i++; 1619 } else { /* Begining of the block, so check if the complete block exists */ 1620 flg = PETSC_TRUE; 1621 for (k=1; k<bs; k++) { 1622 if (row+k != idx[i+k]) { /* break in the block */ 1623 flg = PETSC_FALSE; 1624 break; 1625 } 1626 } 1627 if (flg) { /* No break in the bs */ 1628 sizes[j] = bs; 1629 i+= bs; 1630 } else { 1631 sizes[j] = 1; 1632 i++; 1633 } 1634 } 1635 } 1636 *bs_max = j; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1642 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1643 { 1644 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1645 PetscErrorCode ierr; 1646 PetscInt i,j,k,count,*rows; 1647 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1648 PetscScalar zero = 0.0; 1649 MatScalar *aa; 1650 1651 PetscFunctionBegin; 1652 /* Make a copy of the IS and sort it */ 1653 /* allocate memory for rows,sizes */ 1654 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1655 sizes = rows + is_n; 1656 1657 /* copy IS values to rows, and sort them */ 1658 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1659 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1660 if (baij->keepzeroedrows) { 1661 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1662 bs_max = is_n; 1663 A->same_nonzero = PETSC_TRUE; 1664 } else { 1665 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1666 A->same_nonzero = PETSC_FALSE; 1667 } 1668 1669 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1670 row = rows[j]; 1671 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1672 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1673 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1674 if (sizes[i] == bs && !baij->keepzeroedrows) { 1675 if (diag != 0.0) { 1676 if (baij->ilen[row/bs] > 0) { 1677 baij->ilen[row/bs] = 1; 1678 baij->j[baij->i[row/bs]] = row/bs; 1679 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1680 } 1681 /* Now insert all the diagonal values for this bs */ 1682 for (k=0; k<bs; k++) { 1683 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1684 } 1685 } else { /* (diag == 0.0) */ 1686 baij->ilen[row/bs] = 0; 1687 } /* end (diag == 0.0) */ 1688 } else { /* (sizes[i] != bs) */ 1689 #if defined (PETSC_USE_DEBUG) 1690 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1691 #endif 1692 for (k=0; k<count; k++) { 1693 aa[0] = zero; 1694 aa += bs; 1695 } 1696 if (diag != 0.0) { 1697 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1698 } 1699 } 1700 } 1701 1702 ierr = PetscFree(rows);CHKERRQ(ierr); 1703 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #undef __FUNCT__ 1708 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1709 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1710 { 1711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1712 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1713 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1714 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1715 PetscErrorCode ierr; 1716 PetscInt ridx,cidx,bs2=a->bs2; 1717 PetscTruth roworiented=a->roworiented; 1718 MatScalar *ap,value,*aa=a->a,*bap; 1719 1720 PetscFunctionBegin; 1721 for (k=0; k<m; k++) { /* loop over added rows */ 1722 row = im[k]; 1723 brow = row/bs; 1724 if (row < 0) continue; 1725 #if defined(PETSC_USE_DEBUG) 1726 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1727 #endif 1728 rp = aj + ai[brow]; 1729 ap = aa + bs2*ai[brow]; 1730 rmax = imax[brow]; 1731 nrow = ailen[brow]; 1732 low = 0; 1733 high = nrow; 1734 for (l=0; l<n; l++) { /* loop over added columns */ 1735 if (in[l] < 0) continue; 1736 #if defined(PETSC_USE_DEBUG) 1737 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1738 #endif 1739 col = in[l]; bcol = col/bs; 1740 ridx = row % bs; cidx = col % bs; 1741 if (roworiented) { 1742 value = v[l + k*n]; 1743 } else { 1744 value = v[k + l*m]; 1745 } 1746 if (col <= lastcol) low = 0; else high = nrow; 1747 lastcol = col; 1748 while (high-low > 7) { 1749 t = (low+high)/2; 1750 if (rp[t] > bcol) high = t; 1751 else low = t; 1752 } 1753 for (i=low; i<high; i++) { 1754 if (rp[i] > bcol) break; 1755 if (rp[i] == bcol) { 1756 bap = ap + bs2*i + bs*cidx + ridx; 1757 if (is == ADD_VALUES) *bap += value; 1758 else *bap = value; 1759 goto noinsert1; 1760 } 1761 } 1762 if (nonew == 1) goto noinsert1; 1763 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1764 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1765 N = nrow++ - 1; high++; 1766 /* shift up all the later entries in this row */ 1767 for (ii=N; ii>=i; ii--) { 1768 rp[ii+1] = rp[ii]; 1769 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1770 } 1771 if (N>=i) { 1772 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1773 } 1774 rp[i] = bcol; 1775 ap[bs2*i + bs*cidx + ridx] = value; 1776 a->nz++; 1777 noinsert1:; 1778 low = i; 1779 } 1780 ailen[brow] = nrow; 1781 } 1782 A->same_nonzero = PETSC_FALSE; 1783 PetscFunctionReturn(0); 1784 } 1785 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1789 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1790 { 1791 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1792 Mat outA; 1793 PetscErrorCode ierr; 1794 PetscTruth row_identity,col_identity; 1795 1796 PetscFunctionBegin; 1797 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1798 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1799 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1800 if (!row_identity || !col_identity) { 1801 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1802 } 1803 1804 outA = inA; 1805 inA->factor = FACTOR_LU; 1806 1807 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1808 1809 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1810 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1811 a->row = row; 1812 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1813 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1814 a->col = col; 1815 1816 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1817 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1818 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1819 1820 /* 1821 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1822 for ILU(0) factorization with natural ordering 1823 */ 1824 if (inA->rmap.bs < 8) { 1825 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1826 } else { 1827 if (!a->solve_work) { 1828 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1829 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1830 } 1831 } 1832 1833 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1834 1835 PetscFunctionReturn(0); 1836 } 1837 1838 EXTERN_C_BEGIN 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1841 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1842 { 1843 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1844 PetscInt i,nz,nbs; 1845 1846 PetscFunctionBegin; 1847 nz = baij->maxnz/baij->bs2; 1848 nbs = baij->nbs; 1849 for (i=0; i<nz; i++) { 1850 baij->j[i] = indices[i]; 1851 } 1852 baij->nz = nz; 1853 for (i=0; i<nbs; i++) { 1854 baij->ilen[i] = baij->imax[i]; 1855 } 1856 1857 PetscFunctionReturn(0); 1858 } 1859 EXTERN_C_END 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1863 /*@ 1864 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1865 in the matrix. 1866 1867 Input Parameters: 1868 + mat - the SeqBAIJ matrix 1869 - indices - the column indices 1870 1871 Level: advanced 1872 1873 Notes: 1874 This can be called if you have precomputed the nonzero structure of the 1875 matrix and want to provide it to the matrix object to improve the performance 1876 of the MatSetValues() operation. 1877 1878 You MUST have set the correct numbers of nonzeros per row in the call to 1879 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1880 1881 MUST be called before any calls to MatSetValues(); 1882 1883 @*/ 1884 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1885 { 1886 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1887 1888 PetscFunctionBegin; 1889 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1890 PetscValidPointer(indices,2); 1891 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1892 if (f) { 1893 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1894 } else { 1895 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1896 } 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1902 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1903 { 1904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1905 PetscErrorCode ierr; 1906 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1907 PetscReal atmp; 1908 PetscScalar *x,zero = 0.0; 1909 MatScalar *aa; 1910 PetscInt ncols,brow,krow,kcol; 1911 1912 PetscFunctionBegin; 1913 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1914 bs = A->rmap.bs; 1915 aa = a->a; 1916 ai = a->i; 1917 aj = a->j; 1918 mbs = a->mbs; 1919 1920 ierr = VecSet(v,zero);CHKERRQ(ierr); 1921 if (idx) { 1922 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1923 } 1924 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1925 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1926 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1927 for (i=0; i<mbs; i++) { 1928 ncols = ai[1] - ai[0]; ai++; 1929 brow = bs*i; 1930 for (j=0; j<ncols; j++){ 1931 for (kcol=0; kcol<bs; kcol++){ 1932 for (krow=0; krow<bs; krow++){ 1933 atmp = PetscAbsScalar(*aa);aa++; 1934 row = brow + krow; /* row index */ 1935 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1936 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1937 } 1938 } 1939 aj++; 1940 } 1941 } 1942 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatCopy_SeqBAIJ" 1948 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1949 { 1950 PetscErrorCode ierr; 1951 1952 PetscFunctionBegin; 1953 /* If the two matrices have the same copy implementation, use fast copy. */ 1954 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1955 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1956 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1957 1958 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1959 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1960 } 1961 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1962 } else { 1963 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1964 } 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1970 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1971 { 1972 PetscErrorCode ierr; 1973 1974 PetscFunctionBegin; 1975 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1981 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1982 { 1983 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1984 PetscFunctionBegin; 1985 *array = a->a; 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1991 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1992 { 1993 PetscFunctionBegin; 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #include "petscblaslapack.h" 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2000 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2001 { 2002 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2003 PetscErrorCode ierr; 2004 PetscInt i,bs=Y->rmap.bs,j,bs2; 2005 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 2006 2007 PetscFunctionBegin; 2008 if (str == SAME_NONZERO_PATTERN) { 2009 PetscScalar alpha = a; 2010 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2011 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2012 if (y->xtoy && y->XtoY != X) { 2013 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2014 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2015 } 2016 if (!y->xtoy) { /* get xtoy */ 2017 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2018 y->XtoY = X; 2019 } 2020 bs2 = bs*bs; 2021 for (i=0; i<x->nz; i++) { 2022 j = 0; 2023 while (j < bs2){ 2024 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2025 j++; 2026 } 2027 } 2028 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2029 } else { 2030 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2037 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2038 { 2039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2040 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2041 PetscScalar *aa = a->a; 2042 2043 PetscFunctionBegin; 2044 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 #undef __FUNCT__ 2049 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2050 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2051 { 2052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2053 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2054 PetscScalar *aa = a->a; 2055 2056 PetscFunctionBegin; 2057 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 2062 /* -------------------------------------------------------------------*/ 2063 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2064 MatGetRow_SeqBAIJ, 2065 MatRestoreRow_SeqBAIJ, 2066 MatMult_SeqBAIJ_N, 2067 /* 4*/ MatMultAdd_SeqBAIJ_N, 2068 MatMultTranspose_SeqBAIJ, 2069 MatMultTransposeAdd_SeqBAIJ, 2070 MatSolve_SeqBAIJ_N, 2071 0, 2072 0, 2073 /*10*/ 0, 2074 MatLUFactor_SeqBAIJ, 2075 0, 2076 0, 2077 MatTranspose_SeqBAIJ, 2078 /*15*/ MatGetInfo_SeqBAIJ, 2079 MatEqual_SeqBAIJ, 2080 MatGetDiagonal_SeqBAIJ, 2081 MatDiagonalScale_SeqBAIJ, 2082 MatNorm_SeqBAIJ, 2083 /*20*/ 0, 2084 MatAssemblyEnd_SeqBAIJ, 2085 0, 2086 MatSetOption_SeqBAIJ, 2087 MatZeroEntries_SeqBAIJ, 2088 /*25*/ MatZeroRows_SeqBAIJ, 2089 MatLUFactorSymbolic_SeqBAIJ, 2090 MatLUFactorNumeric_SeqBAIJ_N, 2091 MatCholeskyFactorSymbolic_SeqBAIJ, 2092 MatCholeskyFactorNumeric_SeqBAIJ_N, 2093 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2094 MatILUFactorSymbolic_SeqBAIJ, 2095 MatICCFactorSymbolic_SeqBAIJ, 2096 MatGetArray_SeqBAIJ, 2097 MatRestoreArray_SeqBAIJ, 2098 /*35*/ MatDuplicate_SeqBAIJ, 2099 0, 2100 0, 2101 MatILUFactor_SeqBAIJ, 2102 0, 2103 /*40*/ MatAXPY_SeqBAIJ, 2104 MatGetSubMatrices_SeqBAIJ, 2105 MatIncreaseOverlap_SeqBAIJ, 2106 MatGetValues_SeqBAIJ, 2107 MatCopy_SeqBAIJ, 2108 /*45*/ 0, 2109 MatScale_SeqBAIJ, 2110 0, 2111 0, 2112 0, 2113 /*50*/ 0, 2114 MatGetRowIJ_SeqBAIJ, 2115 MatRestoreRowIJ_SeqBAIJ, 2116 0, 2117 0, 2118 /*55*/ 0, 2119 0, 2120 0, 2121 0, 2122 MatSetValuesBlocked_SeqBAIJ, 2123 /*60*/ MatGetSubMatrix_SeqBAIJ, 2124 MatDestroy_SeqBAIJ, 2125 MatView_SeqBAIJ, 2126 0, 2127 0, 2128 /*65*/ 0, 2129 0, 2130 0, 2131 0, 2132 0, 2133 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2134 MatConvert_Basic, 2135 0, 2136 0, 2137 0, 2138 /*75*/ 0, 2139 0, 2140 0, 2141 0, 2142 0, 2143 /*80*/ 0, 2144 0, 2145 0, 2146 0, 2147 MatLoad_SeqBAIJ, 2148 /*85*/ 0, 2149 0, 2150 0, 2151 0, 2152 0, 2153 /*90*/ 0, 2154 0, 2155 0, 2156 0, 2157 0, 2158 /*95*/ 0, 2159 0, 2160 0, 2161 0, 2162 0, 2163 /*100*/0, 2164 0, 2165 0, 2166 0, 2167 0, 2168 /*105*/0, 2169 MatRealPart_SeqBAIJ, 2170 MatImaginaryPart_SeqBAIJ, 2171 0, 2172 0, 2173 /*110*/0, 2174 0, 2175 0, 2176 0, 2177 MatMissingDiagonal_SeqBAIJ 2178 /*115*/ 2179 }; 2180 2181 EXTERN_C_BEGIN 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2184 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2185 { 2186 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2187 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 if (aij->nonew != 1) { 2192 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2193 } 2194 2195 /* allocate space for values if not already there */ 2196 if (!aij->saved_values) { 2197 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2198 } 2199 2200 /* copy values over */ 2201 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2202 PetscFunctionReturn(0); 2203 } 2204 EXTERN_C_END 2205 2206 EXTERN_C_BEGIN 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2209 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2210 { 2211 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2212 PetscErrorCode ierr; 2213 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2214 2215 PetscFunctionBegin; 2216 if (aij->nonew != 1) { 2217 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2218 } 2219 if (!aij->saved_values) { 2220 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2221 } 2222 2223 /* copy values over */ 2224 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2225 PetscFunctionReturn(0); 2226 } 2227 EXTERN_C_END 2228 2229 EXTERN_C_BEGIN 2230 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2231 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2232 EXTERN_C_END 2233 2234 EXTERN_C_BEGIN 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2237 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2238 { 2239 Mat_SeqBAIJ *b; 2240 PetscErrorCode ierr; 2241 PetscInt i,mbs,nbs,bs2,newbs = bs; 2242 PetscTruth flg,skipallocation = PETSC_FALSE; 2243 2244 PetscFunctionBegin; 2245 2246 if (nz == MAT_SKIP_ALLOCATION) { 2247 skipallocation = PETSC_TRUE; 2248 nz = 0; 2249 } 2250 2251 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2252 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2253 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2254 2255 if (nnz && newbs != bs) { 2256 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2257 } 2258 bs = newbs; 2259 2260 B->rmap.bs = B->cmap.bs = bs; 2261 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2262 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2263 2264 B->preallocated = PETSC_TRUE; 2265 2266 mbs = B->rmap.n/bs; 2267 nbs = B->cmap.n/bs; 2268 bs2 = bs*bs; 2269 2270 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2271 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2272 } 2273 2274 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2275 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2276 if (nnz) { 2277 for (i=0; i<mbs; i++) { 2278 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2279 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2280 } 2281 } 2282 2283 b = (Mat_SeqBAIJ*)B->data; 2284 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2285 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2286 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2287 2288 B->ops->solve = MatSolve_SeqBAIJ_Update; 2289 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2290 if (!flg) { 2291 switch (bs) { 2292 case 1: 2293 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2294 B->ops->mult = MatMult_SeqBAIJ_1; 2295 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2296 break; 2297 case 2: 2298 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2299 B->ops->mult = MatMult_SeqBAIJ_2; 2300 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2301 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2302 break; 2303 case 3: 2304 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2305 B->ops->mult = MatMult_SeqBAIJ_3; 2306 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2307 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2308 break; 2309 case 4: 2310 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2311 B->ops->mult = MatMult_SeqBAIJ_4; 2312 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2313 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2314 break; 2315 case 5: 2316 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2317 B->ops->mult = MatMult_SeqBAIJ_5; 2318 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2319 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2320 break; 2321 case 6: 2322 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2323 B->ops->mult = MatMult_SeqBAIJ_6; 2324 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2325 break; 2326 case 7: 2327 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2328 B->ops->mult = MatMult_SeqBAIJ_7; 2329 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2330 break; 2331 default: 2332 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2333 B->ops->mult = MatMult_SeqBAIJ_N; 2334 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2335 break; 2336 } 2337 } 2338 B->rmap.bs = bs; 2339 b->mbs = mbs; 2340 b->nbs = nbs; 2341 if (!skipallocation) { 2342 if (!b->imax) { 2343 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2344 } 2345 /* b->ilen will count nonzeros in each block row so far. */ 2346 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2347 if (!nnz) { 2348 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2349 else if (nz <= 0) nz = 1; 2350 for (i=0; i<mbs; i++) b->imax[i] = nz; 2351 nz = nz*mbs; 2352 } else { 2353 nz = 0; 2354 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2355 } 2356 2357 /* allocate the matrix space */ 2358 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2359 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2360 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2361 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2362 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2363 b->singlemalloc = PETSC_TRUE; 2364 b->i[0] = 0; 2365 for (i=1; i<mbs+1; i++) { 2366 b->i[i] = b->i[i-1] + b->imax[i-1]; 2367 } 2368 b->free_a = PETSC_TRUE; 2369 b->free_ij = PETSC_TRUE; 2370 } else { 2371 b->free_a = PETSC_FALSE; 2372 b->free_ij = PETSC_FALSE; 2373 } 2374 2375 B->rmap.bs = bs; 2376 b->bs2 = bs2; 2377 b->mbs = mbs; 2378 b->nz = 0; 2379 b->maxnz = nz*bs2; 2380 B->info.nz_unneeded = (PetscReal)b->maxnz; 2381 PetscFunctionReturn(0); 2382 } 2383 EXTERN_C_END 2384 2385 /*MC 2386 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2387 block sparse compressed row format. 2388 2389 Options Database Keys: 2390 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2391 2392 Level: beginner 2393 2394 .seealso: MatCreateSeqBAIJ() 2395 M*/ 2396 2397 EXTERN_C_BEGIN 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "MatCreate_SeqBAIJ" 2400 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2401 { 2402 PetscErrorCode ierr; 2403 PetscMPIInt size; 2404 Mat_SeqBAIJ *b; 2405 2406 PetscFunctionBegin; 2407 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2408 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2409 2410 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2411 B->data = (void*)b; 2412 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2413 B->factor = 0; 2414 B->mapping = 0; 2415 b->row = 0; 2416 b->col = 0; 2417 b->icol = 0; 2418 b->reallocs = 0; 2419 b->saved_values = 0; 2420 #if defined(PETSC_USE_MAT_SINGLE) 2421 b->setvalueslen = 0; 2422 b->setvaluescopy = PETSC_NULL; 2423 #endif 2424 2425 b->roworiented = PETSC_TRUE; 2426 b->nonew = 0; 2427 b->diag = 0; 2428 b->solve_work = 0; 2429 b->mult_work = 0; 2430 B->spptr = 0; 2431 B->info.nz_unneeded = (PetscReal)b->maxnz; 2432 b->keepzeroedrows = PETSC_FALSE; 2433 b->xtoy = 0; 2434 b->XtoY = 0; 2435 b->compressedrow.use = PETSC_FALSE; 2436 b->compressedrow.nrows = 0; 2437 b->compressedrow.i = PETSC_NULL; 2438 b->compressedrow.rindex = PETSC_NULL; 2439 b->compressedrow.checked = PETSC_FALSE; 2440 B->same_nonzero = PETSC_FALSE; 2441 2442 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2443 "MatInvertBlockDiagonal_SeqBAIJ", 2444 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2445 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2446 "MatStoreValues_SeqBAIJ", 2447 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2448 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2449 "MatRetrieveValues_SeqBAIJ", 2450 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2451 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2452 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2453 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2454 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2455 "MatConvert_SeqBAIJ_SeqAIJ", 2456 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2457 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2458 "MatConvert_SeqBAIJ_SeqSBAIJ", 2459 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2460 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2461 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2462 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2463 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 EXTERN_C_END 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2470 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2471 { 2472 Mat C; 2473 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2474 PetscErrorCode ierr; 2475 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2476 2477 PetscFunctionBegin; 2478 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2479 2480 *B = 0; 2481 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2482 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2483 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2484 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2485 c = (Mat_SeqBAIJ*)C->data; 2486 2487 C->rmap.N = A->rmap.N; 2488 C->cmap.N = A->cmap.N; 2489 C->rmap.bs = A->rmap.bs; 2490 c->bs2 = a->bs2; 2491 c->mbs = a->mbs; 2492 c->nbs = a->nbs; 2493 2494 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2495 for (i=0; i<mbs; i++) { 2496 c->imax[i] = a->imax[i]; 2497 c->ilen[i] = a->ilen[i]; 2498 } 2499 2500 /* allocate the matrix space */ 2501 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2502 c->singlemalloc = PETSC_TRUE; 2503 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2504 if (mbs > 0) { 2505 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2506 if (cpvalues == MAT_COPY_VALUES) { 2507 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2508 } else { 2509 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2510 } 2511 } 2512 c->roworiented = a->roworiented; 2513 c->nonew = a->nonew; 2514 2515 if (a->diag) { 2516 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2517 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2518 for (i=0; i<mbs; i++) { 2519 c->diag[i] = a->diag[i]; 2520 } 2521 } else c->diag = 0; 2522 c->nz = a->nz; 2523 c->maxnz = a->maxnz; 2524 c->solve_work = 0; 2525 c->mult_work = 0; 2526 c->free_a = PETSC_TRUE; 2527 c->free_ij = PETSC_TRUE; 2528 C->preallocated = PETSC_TRUE; 2529 C->assembled = PETSC_TRUE; 2530 2531 c->compressedrow.use = a->compressedrow.use; 2532 c->compressedrow.nrows = a->compressedrow.nrows; 2533 c->compressedrow.checked = a->compressedrow.checked; 2534 if ( a->compressedrow.checked && a->compressedrow.use){ 2535 i = a->compressedrow.nrows; 2536 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2537 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2538 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2539 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2540 } else { 2541 c->compressedrow.use = PETSC_FALSE; 2542 c->compressedrow.i = PETSC_NULL; 2543 c->compressedrow.rindex = PETSC_NULL; 2544 } 2545 C->same_nonzero = A->same_nonzero; 2546 *B = C; 2547 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 #undef __FUNCT__ 2552 #define __FUNCT__ "MatLoad_SeqBAIJ" 2553 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2554 { 2555 Mat_SeqBAIJ *a; 2556 Mat B; 2557 PetscErrorCode ierr; 2558 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2559 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2560 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2561 PetscInt *masked,nmask,tmp,bs2,ishift; 2562 PetscMPIInt size; 2563 int fd; 2564 PetscScalar *aa; 2565 MPI_Comm comm = ((PetscObject)viewer)->comm; 2566 2567 PetscFunctionBegin; 2568 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2569 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2570 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2571 bs2 = bs*bs; 2572 2573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2574 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2575 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2576 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2577 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2578 M = header[1]; N = header[2]; nz = header[3]; 2579 2580 if (header[3] < 0) { 2581 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2582 } 2583 2584 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2585 2586 /* 2587 This code adds extra rows to make sure the number of rows is 2588 divisible by the blocksize 2589 */ 2590 mbs = M/bs; 2591 extra_rows = bs - M + bs*(mbs); 2592 if (extra_rows == bs) extra_rows = 0; 2593 else mbs++; 2594 if (extra_rows) { 2595 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2596 } 2597 2598 /* read in row lengths */ 2599 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2600 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2601 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2602 2603 /* read in column indices */ 2604 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2605 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2606 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2607 2608 /* loop over row lengths determining block row lengths */ 2609 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2610 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2611 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2612 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2613 masked = mask + mbs; 2614 rowcount = 0; nzcount = 0; 2615 for (i=0; i<mbs; i++) { 2616 nmask = 0; 2617 for (j=0; j<bs; j++) { 2618 kmax = rowlengths[rowcount]; 2619 for (k=0; k<kmax; k++) { 2620 tmp = jj[nzcount++]/bs; 2621 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2622 } 2623 rowcount++; 2624 } 2625 browlengths[i] += nmask; 2626 /* zero out the mask elements we set */ 2627 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2628 } 2629 2630 /* create our matrix */ 2631 ierr = MatCreate(comm,&B); 2632 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2633 ierr = MatSetType(B,type);CHKERRQ(ierr); 2634 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2635 a = (Mat_SeqBAIJ*)B->data; 2636 2637 /* set matrix "i" values */ 2638 a->i[0] = 0; 2639 for (i=1; i<= mbs; i++) { 2640 a->i[i] = a->i[i-1] + browlengths[i-1]; 2641 a->ilen[i-1] = browlengths[i-1]; 2642 } 2643 a->nz = 0; 2644 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2645 2646 /* read in nonzero values */ 2647 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2648 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2649 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2650 2651 /* set "a" and "j" values into matrix */ 2652 nzcount = 0; jcount = 0; 2653 for (i=0; i<mbs; i++) { 2654 nzcountb = nzcount; 2655 nmask = 0; 2656 for (j=0; j<bs; j++) { 2657 kmax = rowlengths[i*bs+j]; 2658 for (k=0; k<kmax; k++) { 2659 tmp = jj[nzcount++]/bs; 2660 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2661 } 2662 } 2663 /* sort the masked values */ 2664 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2665 2666 /* set "j" values into matrix */ 2667 maskcount = 1; 2668 for (j=0; j<nmask; j++) { 2669 a->j[jcount++] = masked[j]; 2670 mask[masked[j]] = maskcount++; 2671 } 2672 /* set "a" values into matrix */ 2673 ishift = bs2*a->i[i]; 2674 for (j=0; j<bs; j++) { 2675 kmax = rowlengths[i*bs+j]; 2676 for (k=0; k<kmax; k++) { 2677 tmp = jj[nzcountb]/bs ; 2678 block = mask[tmp] - 1; 2679 point = jj[nzcountb] - bs*tmp; 2680 idx = ishift + bs2*block + j + bs*point; 2681 a->a[idx] = (MatScalar)aa[nzcountb++]; 2682 } 2683 } 2684 /* zero out the mask elements we set */ 2685 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2686 } 2687 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2688 2689 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2690 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2691 ierr = PetscFree(aa);CHKERRQ(ierr); 2692 ierr = PetscFree(jj);CHKERRQ(ierr); 2693 ierr = PetscFree(mask);CHKERRQ(ierr); 2694 2695 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2696 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2697 ierr = MatView_Private(B);CHKERRQ(ierr); 2698 2699 *A = B; 2700 PetscFunctionReturn(0); 2701 } 2702 2703 #undef __FUNCT__ 2704 #define __FUNCT__ "MatCreateSeqBAIJ" 2705 /*@C 2706 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2707 compressed row) format. For good matrix assembly performance the 2708 user should preallocate the matrix storage by setting the parameter nz 2709 (or the array nnz). By setting these parameters accurately, performance 2710 during matrix assembly can be increased by more than a factor of 50. 2711 2712 Collective on MPI_Comm 2713 2714 Input Parameters: 2715 + comm - MPI communicator, set to PETSC_COMM_SELF 2716 . bs - size of block 2717 . m - number of rows 2718 . n - number of columns 2719 . nz - number of nonzero blocks per block row (same for all rows) 2720 - nnz - array containing the number of nonzero blocks in the various block rows 2721 (possibly different for each block row) or PETSC_NULL 2722 2723 Output Parameter: 2724 . A - the matrix 2725 2726 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2727 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 2728 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 2729 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2730 2731 Options Database Keys: 2732 . -mat_no_unroll - uses code that does not unroll the loops in the 2733 block calculations (much slower) 2734 . -mat_block_size - size of the blocks to use 2735 2736 Level: intermediate 2737 2738 Notes: 2739 The number of rows and columns must be divisible by blocksize. 2740 2741 If the nnz parameter is given then the nz parameter is ignored 2742 2743 A nonzero block is any block that as 1 or more nonzeros in it 2744 2745 The block AIJ format is fully compatible with standard Fortran 77 2746 storage. That is, the stored row and column indices can begin at 2747 either one (as in Fortran) or zero. See the users' manual for details. 2748 2749 Specify the preallocated storage with either nz or nnz (not both). 2750 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2751 allocation. For additional details, see the users manual chapter on 2752 matrices. 2753 2754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2755 @*/ 2756 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2757 { 2758 PetscErrorCode ierr; 2759 2760 PetscFunctionBegin; 2761 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2762 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2763 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2764 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2770 /*@C 2771 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2772 per row in the matrix. For good matrix assembly performance the 2773 user should preallocate the matrix storage by setting the parameter nz 2774 (or the array nnz). By setting these parameters accurately, performance 2775 during matrix assembly can be increased by more than a factor of 50. 2776 2777 Collective on MPI_Comm 2778 2779 Input Parameters: 2780 + A - the matrix 2781 . bs - size of block 2782 . nz - number of block nonzeros per block row (same for all rows) 2783 - nnz - array containing the number of block nonzeros in the various block rows 2784 (possibly different for each block row) or PETSC_NULL 2785 2786 Options Database Keys: 2787 . -mat_no_unroll - uses code that does not unroll the loops in the 2788 block calculations (much slower) 2789 . -mat_block_size - size of the blocks to use 2790 2791 Level: intermediate 2792 2793 Notes: 2794 If the nnz parameter is given then the nz parameter is ignored 2795 2796 You can call MatGetInfo() to get information on how effective the preallocation was; 2797 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2798 You can also run with the option -info and look for messages with the string 2799 malloc in them to see if additional memory allocation was needed. 2800 2801 The block AIJ format is fully compatible with standard Fortran 77 2802 storage. That is, the stored row and column indices can begin at 2803 either one (as in Fortran) or zero. See the users' manual for details. 2804 2805 Specify the preallocated storage with either nz or nnz (not both). 2806 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2807 allocation. For additional details, see the users manual chapter on 2808 matrices. 2809 2810 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 2811 @*/ 2812 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2813 { 2814 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2815 2816 PetscFunctionBegin; 2817 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2818 if (f) { 2819 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2820 } 2821 PetscFunctionReturn(0); 2822 } 2823 2824 #undef __FUNCT__ 2825 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2826 /*@ 2827 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2828 (upper triangular entries in CSR format) provided by the user. 2829 2830 Collective on MPI_Comm 2831 2832 Input Parameters: 2833 + comm - must be an MPI communicator of size 1 2834 . bs - size of block 2835 . m - number of rows 2836 . n - number of columns 2837 . i - row indices 2838 . j - column indices 2839 - a - matrix values 2840 2841 Output Parameter: 2842 . mat - the matrix 2843 2844 Level: intermediate 2845 2846 Notes: 2847 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2848 once the matrix is destroyed 2849 2850 You cannot set new nonzero locations into this matrix, that will generate an error. 2851 2852 The i and j indices are 0 based 2853 2854 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2855 2856 @*/ 2857 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2858 { 2859 PetscErrorCode ierr; 2860 PetscInt ii; 2861 Mat_SeqBAIJ *baij; 2862 2863 PetscFunctionBegin; 2864 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2865 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2866 2867 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2868 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2869 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2870 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2871 baij = (Mat_SeqBAIJ*)(*mat)->data; 2872 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2873 2874 baij->i = i; 2875 baij->j = j; 2876 baij->a = a; 2877 baij->singlemalloc = PETSC_FALSE; 2878 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2879 baij->free_a = PETSC_FALSE; 2880 baij->free_ij = PETSC_FALSE; 2881 2882 for (ii=0; ii<m; ii++) { 2883 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2884 #if defined(PETSC_USE_DEBUG) 2885 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2886 #endif 2887 } 2888 #if defined(PETSC_USE_DEBUG) 2889 for (ii=0; ii<baij->i[m]; ii++) { 2890 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2891 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2892 } 2893 #endif 2894 2895 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2896 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2897 PetscFunctionReturn(0); 2898 } 2899