1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@ 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 233 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 234 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 235 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 236 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 237 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 238 239 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 240 241 diag = a->diag; 242 idiag = a->idiag; 243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 245 246 if (flag & SOR_ZERO_INITIAL_GUESS) { 247 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 248 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 249 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 250 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 251 i2 = 3; 252 idiag += 9; 253 for (i=1; i<m; i++) { 254 v = aa + 9*ai[i]; 255 vi = aj + ai[i]; 256 nz = diag[i] - ai[i]; 257 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 258 while (nz--) { 259 idx = 3*(*vi++); 260 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 261 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 262 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 263 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 264 v += 9; 265 } 266 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 267 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 268 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 269 idiag += 9; 270 i2 += 3; 271 } 272 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 273 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 274 } 275 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 276 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 277 i2 = 0; 278 mdiag = a->idiag+9*a->mbs; 279 for (i=0; i<m; i++) { 280 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 281 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 282 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 283 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 284 mdiag += 9; 285 i2 += 3; 286 } 287 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 288 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 289 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 290 } 291 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 292 idiag = a->idiag+9*a->mbs - 9; 293 i2 = 3*m - 3; 294 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 295 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 296 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 297 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 298 idiag -= 9; 299 i2 -= 3; 300 for (i=m-2; i>=0; i--) { 301 v = aa + 9*(diag[i]+1); 302 vi = aj + diag[i] + 1; 303 nz = ai[i+1] - diag[i] - 1; 304 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 305 while (nz--) { 306 idx = 3*(*vi++); 307 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 308 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 309 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 310 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 311 v += 9; 312 } 313 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 314 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 315 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 316 idiag -= 9; 317 i2 -= 3; 318 } 319 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 320 } 321 } else { 322 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 323 } 324 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 325 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 331 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 332 { 333 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 334 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 335 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 336 PetscErrorCode ierr; 337 PetscInt m = a->mbs,i,i2,nz,idx; 338 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 339 340 PetscFunctionBegin; 341 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 342 its = its*lits; 343 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 344 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 345 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 346 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 347 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 348 349 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 350 351 diag = a->diag; 352 idiag = a->idiag; 353 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 354 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 355 356 if (flag & SOR_ZERO_INITIAL_GUESS) { 357 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 358 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 359 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 360 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 361 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 362 i2 = 4; 363 idiag += 16; 364 for (i=1; i<m; i++) { 365 v = aa + 16*ai[i]; 366 vi = aj + ai[i]; 367 nz = diag[i] - ai[i]; 368 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 369 while (nz--) { 370 idx = 4*(*vi++); 371 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 372 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 373 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 374 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 375 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 376 v += 16; 377 } 378 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 379 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 380 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 381 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 382 idiag += 16; 383 i2 += 4; 384 } 385 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 386 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 387 } 388 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 389 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 390 i2 = 0; 391 mdiag = a->idiag+16*a->mbs; 392 for (i=0; i<m; i++) { 393 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 394 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 395 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 396 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 397 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 398 mdiag += 16; 399 i2 += 4; 400 } 401 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 402 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 403 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 404 } 405 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 406 idiag = a->idiag+16*a->mbs - 16; 407 i2 = 4*m - 4; 408 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 409 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 410 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 411 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 412 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 413 idiag -= 16; 414 i2 -= 4; 415 for (i=m-2; i>=0; i--) { 416 v = aa + 16*(diag[i]+1); 417 vi = aj + diag[i] + 1; 418 nz = ai[i+1] - diag[i] - 1; 419 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 420 while (nz--) { 421 idx = 4*(*vi++); 422 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 423 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 424 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 425 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 426 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 427 v += 16; 428 } 429 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 430 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 431 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 432 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 433 idiag -= 16; 434 i2 -= 4; 435 } 436 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 437 } 438 } else { 439 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 440 } 441 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 442 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 448 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 449 { 450 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 451 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 452 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 453 PetscErrorCode ierr; 454 PetscInt m = a->mbs,i,i2,nz,idx; 455 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 456 457 PetscFunctionBegin; 458 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 459 its = its*lits; 460 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 461 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 462 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 463 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 464 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 465 466 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 467 468 diag = a->diag; 469 idiag = a->idiag; 470 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 471 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 472 473 if (flag & SOR_ZERO_INITIAL_GUESS) { 474 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 475 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 476 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 477 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 478 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 479 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 480 i2 = 5; 481 idiag += 25; 482 for (i=1; i<m; i++) { 483 v = aa + 25*ai[i]; 484 vi = aj + ai[i]; 485 nz = diag[i] - ai[i]; 486 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 487 while (nz--) { 488 idx = 5*(*vi++); 489 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 490 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 491 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 492 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 493 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 494 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 495 v += 25; 496 } 497 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 498 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 499 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 500 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 501 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 502 idiag += 25; 503 i2 += 5; 504 } 505 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 507 } 508 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510 i2 = 0; 511 mdiag = a->idiag+25*a->mbs; 512 for (i=0; i<m; i++) { 513 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 514 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 515 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 516 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 517 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 518 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 519 mdiag += 25; 520 i2 += 5; 521 } 522 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 523 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 524 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 525 } 526 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 527 idiag = a->idiag+25*a->mbs - 25; 528 i2 = 5*m - 5; 529 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 530 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 531 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 532 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 533 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 534 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 535 idiag -= 25; 536 i2 -= 5; 537 for (i=m-2; i>=0; i--) { 538 v = aa + 25*(diag[i]+1); 539 vi = aj + diag[i] + 1; 540 nz = ai[i+1] - diag[i] - 1; 541 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 542 while (nz--) { 543 idx = 5*(*vi++); 544 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 545 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 546 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 547 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 548 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 549 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 550 v += 25; 551 } 552 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 553 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 554 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 555 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 556 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 557 idiag -= 25; 558 i2 -= 5; 559 } 560 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 561 } 562 } else { 563 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 564 } 565 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 566 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 /* 571 Special version for direct calls from Fortran (Used in PETSc-fun3d) 572 */ 573 #if defined(PETSC_HAVE_FORTRAN_CAPS) 574 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 575 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 576 #define matsetvaluesblocked4_ matsetvaluesblocked4 577 #endif 578 579 EXTERN_C_BEGIN 580 #undef __FUNCT__ 581 #define __FUNCT__ "matsetvaluesblocked4_" 582 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 583 { 584 Mat A = *AA; 585 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 586 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 587 PetscInt *ai=a->i,*ailen=a->ilen; 588 PetscInt *aj=a->j,stepval,lastcol = -1; 589 const PetscScalar *value = v; 590 MatScalar *ap,*aa = a->a,*bap; 591 592 PetscFunctionBegin; 593 if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 594 stepval = (n-1)*4; 595 for (k=0; k<m; k++) { /* loop over added rows */ 596 row = im[k]; 597 rp = aj + ai[row]; 598 ap = aa + 16*ai[row]; 599 nrow = ailen[row]; 600 low = 0; 601 high = nrow; 602 for (l=0; l<n; l++) { /* loop over added columns */ 603 col = in[l]; 604 if (col <= lastcol) low = 0; else high = nrow; 605 lastcol = col; 606 value = v + k*(stepval+4 + l)*4; 607 while (high-low > 7) { 608 t = (low+high)/2; 609 if (rp[t] > col) high = t; 610 else low = t; 611 } 612 for (i=low; i<high; i++) { 613 if (rp[i] > col) break; 614 if (rp[i] == col) { 615 bap = ap + 16*i; 616 for (ii=0; ii<4; ii++,value+=stepval) { 617 for (jj=ii; jj<16; jj+=4) { 618 bap[jj] += *value++; 619 } 620 } 621 goto noinsert2; 622 } 623 } 624 N = nrow++ - 1; 625 high++; /* added new column index thus must search to one higher than before */ 626 /* shift up all the later entries in this row */ 627 for (ii=N; ii>=i; ii--) { 628 rp[ii+1] = rp[ii]; 629 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 630 } 631 if (N >= i) { 632 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 633 } 634 rp[i] = col; 635 bap = ap + 16*i; 636 for (ii=0; ii<4; ii++,value+=stepval) { 637 for (jj=ii; jj<16; jj+=4) { 638 bap[jj] = *value++; 639 } 640 } 641 noinsert2:; 642 low = i; 643 } 644 ailen[row] = nrow; 645 } 646 PetscFunctionReturnVoid(); 647 } 648 EXTERN_C_END 649 650 #if defined(PETSC_HAVE_FORTRAN_CAPS) 651 #define matsetvalues4_ MATSETVALUES4 652 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 653 #define matsetvalues4_ matsetvalues4 654 #endif 655 656 EXTERN_C_BEGIN 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatSetValues4_" 659 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 660 { 661 Mat A = *AA; 662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 663 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 664 PetscInt *ai=a->i,*ailen=a->ilen; 665 PetscInt *aj=a->j,brow,bcol; 666 PetscInt ridx,cidx,lastcol = -1; 667 MatScalar *ap,value,*aa=a->a,*bap; 668 669 PetscFunctionBegin; 670 for (k=0; k<m; k++) { /* loop over added rows */ 671 row = im[k]; brow = row/4; 672 rp = aj + ai[brow]; 673 ap = aa + 16*ai[brow]; 674 nrow = ailen[brow]; 675 low = 0; 676 high = nrow; 677 for (l=0; l<n; l++) { /* loop over added columns */ 678 col = in[l]; bcol = col/4; 679 ridx = row % 4; cidx = col % 4; 680 value = v[l + k*n]; 681 if (col <= lastcol) low = 0; else high = nrow; 682 lastcol = col; 683 while (high-low > 7) { 684 t = (low+high)/2; 685 if (rp[t] > bcol) high = t; 686 else low = t; 687 } 688 for (i=low; i<high; i++) { 689 if (rp[i] > bcol) break; 690 if (rp[i] == bcol) { 691 bap = ap + 16*i + 4*cidx + ridx; 692 *bap += value; 693 goto noinsert1; 694 } 695 } 696 N = nrow++ - 1; 697 high++; /* added new column thus must search to one higher than before */ 698 /* shift up all the later entries in this row */ 699 for (ii=N; ii>=i; ii--) { 700 rp[ii+1] = rp[ii]; 701 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 702 } 703 if (N>=i) { 704 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 705 } 706 rp[i] = bcol; 707 ap[16*i + 4*cidx + ridx] = value; 708 noinsert1:; 709 low = i; 710 } 711 ailen[brow] = nrow; 712 } 713 PetscFunctionReturnVoid(); 714 } 715 EXTERN_C_END 716 717 /* UGLY, ugly, ugly 718 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 719 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 720 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 721 converts the entries into single precision and then calls ..._MatScalar() to put them 722 into the single precision data structures. 723 */ 724 #if defined(PETSC_USE_MAT_SINGLE) 725 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 726 #else 727 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 728 #endif 729 730 #define CHUNKSIZE 10 731 732 /* 733 Checks for missing diagonals 734 */ 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 737 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 738 { 739 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 740 PetscErrorCode ierr; 741 PetscInt *diag,*jj = a->j,i; 742 743 PetscFunctionBegin; 744 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 745 *missing = PETSC_FALSE; 746 diag = a->diag; 747 for (i=0; i<a->mbs; i++) { 748 if (jj[diag[i]] != i) { 749 *missing = PETSC_TRUE; 750 if (d) *d = i; 751 } 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 758 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 759 { 760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 761 PetscErrorCode ierr; 762 PetscInt i,j,m = a->mbs; 763 764 PetscFunctionBegin; 765 if (!a->diag) { 766 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 767 } 768 for (i=0; i<m; i++) { 769 a->diag[i] = a->i[i+1]; 770 for (j=a->i[i]; j<a->i[i+1]; j++) { 771 if (a->j[j] == i) { 772 a->diag[i] = j; 773 break; 774 } 775 } 776 } 777 PetscFunctionReturn(0); 778 } 779 780 781 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 785 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 786 { 787 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 788 PetscErrorCode ierr; 789 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 790 PetscInt *tia, *tja; 791 792 PetscFunctionBegin; 793 *nn = n; 794 if (!ia) PetscFunctionReturn(0); 795 if (symmetric) { 796 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 797 } else { 798 tia = a->i; tja = a->j; 799 } 800 801 if (!blockcompressed) { 802 /* malloc & create the natural set of indices */ 803 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 804 for (i=0; i<n+1; i++) { 805 for (j=0; j<bs; j++) { 806 *ia[i*bs+j] = tia[i]*bs+j; 807 } 808 } 809 for (i=0; i<nz; i++) { 810 for (j=0; j<bs; j++) { 811 *ja[i*bs+j] = tia[i]*bs+j; 812 } 813 } 814 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 815 ierr = PetscFree(tia);CHKERRQ(ierr); 816 ierr = PetscFree(tja);CHKERRQ(ierr); 817 } 818 } else { 819 *ia = tia; 820 *ja = tja; 821 } 822 if (oshift == 1) { 823 for (i=0; i<nz; i++) (*ja)[i]++; 824 for (i=0; i<n+1; i++) (*ia)[i]++; 825 } 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 831 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 834 PetscErrorCode ierr; 835 PetscInt i,n = a->mbs,nz = a->i[n]; 836 837 PetscFunctionBegin; 838 if (!ia) PetscFunctionReturn(0); 839 if (!blockcompressed) { 840 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 841 }else if (symmetric) { 842 ierr = PetscFree(*ia);CHKERRQ(ierr); 843 ierr = PetscFree(*ja);CHKERRQ(ierr); 844 } else if (oshift == 1) { /* blockcompressed */ 845 for (i=0; i<nz; i++) a->j[i]--; 846 for (i=0; i<n+1; i++) a->i[i]--; 847 } 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatDestroy_SeqBAIJ" 853 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 854 { 855 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 856 PetscErrorCode ierr; 857 858 PetscFunctionBegin; 859 #if defined(PETSC_USE_LOG) 860 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 861 #endif 862 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 863 if (a->row) { 864 ierr = ISDestroy(a->row);CHKERRQ(ierr); 865 } 866 if (a->col) { 867 ierr = ISDestroy(a->col);CHKERRQ(ierr); 868 } 869 ierr = PetscFree(a->diag);CHKERRQ(ierr); 870 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 871 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 872 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 873 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 874 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 875 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 876 #if defined(PETSC_USE_MAT_SINGLE) 877 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 878 #endif 879 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 880 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 881 882 ierr = PetscFree(a);CHKERRQ(ierr); 883 884 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 887 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 888 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 889 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 890 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 891 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatSetOption_SeqBAIJ" 897 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 898 { 899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 900 PetscErrorCode ierr; 901 902 PetscFunctionBegin; 903 switch (op) { 904 case MAT_ROW_ORIENTED: 905 a->roworiented = flg; 906 break; 907 case MAT_KEEP_ZEROED_ROWS: 908 a->keepzeroedrows = flg; 909 break; 910 case MAT_NEW_NONZERO_LOCATIONS: 911 a->nonew = (flg ? 0 : 1); 912 break; 913 case MAT_NEW_NONZERO_LOCATION_ERR: 914 a->nonew = (flg ? -1 : 0); 915 break; 916 case MAT_NEW_NONZERO_ALLOCATION_ERR: 917 a->nonew = (flg ? -2 : 0); 918 break; 919 case MAT_NEW_DIAGONALS: 920 case MAT_IGNORE_OFF_PROC_ENTRIES: 921 case MAT_USE_HASH_TABLE: 922 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 923 break; 924 case MAT_SYMMETRIC: 925 case MAT_STRUCTURALLY_SYMMETRIC: 926 case MAT_HERMITIAN: 927 case MAT_SYMMETRY_ETERNAL: 928 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 929 break; 930 default: 931 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatGetRow_SeqBAIJ" 938 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 939 { 940 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 941 PetscErrorCode ierr; 942 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 943 MatScalar *aa,*aa_i; 944 PetscScalar *v_i; 945 946 PetscFunctionBegin; 947 bs = A->rmap.bs; 948 ai = a->i; 949 aj = a->j; 950 aa = a->a; 951 bs2 = a->bs2; 952 953 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 954 955 bn = row/bs; /* Block number */ 956 bp = row % bs; /* Block Position */ 957 M = ai[bn+1] - ai[bn]; 958 *nz = bs*M; 959 960 if (v) { 961 *v = 0; 962 if (*nz) { 963 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 964 for (i=0; i<M; i++) { /* for each block in the block row */ 965 v_i = *v + i*bs; 966 aa_i = aa + bs2*(ai[bn] + i); 967 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 968 } 969 } 970 } 971 972 if (idx) { 973 *idx = 0; 974 if (*nz) { 975 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 976 for (i=0; i<M; i++) { /* for each block in the block row */ 977 idx_i = *idx + i*bs; 978 itmp = bs*aj[ai[bn] + i]; 979 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 980 } 981 } 982 } 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 988 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 989 { 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 994 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 995 PetscFunctionReturn(0); 996 } 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1000 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1001 { 1002 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1003 Mat C; 1004 PetscErrorCode ierr; 1005 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1006 PetscInt *rows,*cols,bs2=a->bs2; 1007 PetscScalar *array; 1008 1009 PetscFunctionBegin; 1010 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1011 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1012 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1013 1014 #if defined(PETSC_USE_MAT_SINGLE) 1015 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1016 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1017 #else 1018 array = a->a; 1019 #endif 1020 1021 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1022 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1023 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1024 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1025 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1026 ierr = PetscFree(col);CHKERRQ(ierr); 1027 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1028 cols = rows + bs; 1029 for (i=0; i<mbs; i++) { 1030 cols[0] = i*bs; 1031 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1032 len = ai[i+1] - ai[i]; 1033 for (j=0; j<len; j++) { 1034 rows[0] = (*aj++)*bs; 1035 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1036 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1037 array += bs2; 1038 } 1039 } 1040 ierr = PetscFree(rows);CHKERRQ(ierr); 1041 #if defined(PETSC_USE_MAT_SINGLE) 1042 ierr = PetscFree(array); 1043 #endif 1044 1045 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1046 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1047 1048 if (B) { 1049 *B = C; 1050 } else { 1051 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1052 } 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1058 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1059 { 1060 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1061 PetscErrorCode ierr; 1062 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1063 int fd; 1064 PetscScalar *aa; 1065 FILE *file; 1066 1067 PetscFunctionBegin; 1068 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1069 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1070 col_lens[0] = MAT_FILE_COOKIE; 1071 1072 col_lens[1] = A->rmap.N; 1073 col_lens[2] = A->cmap.n; 1074 col_lens[3] = a->nz*bs2; 1075 1076 /* store lengths of each row and write (including header) to file */ 1077 count = 0; 1078 for (i=0; i<a->mbs; i++) { 1079 for (j=0; j<bs; j++) { 1080 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1081 } 1082 } 1083 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1084 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1085 1086 /* store column indices (zero start index) */ 1087 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1088 count = 0; 1089 for (i=0; i<a->mbs; i++) { 1090 for (j=0; j<bs; j++) { 1091 for (k=a->i[i]; k<a->i[i+1]; k++) { 1092 for (l=0; l<bs; l++) { 1093 jj[count++] = bs*a->j[k] + l; 1094 } 1095 } 1096 } 1097 } 1098 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1099 ierr = PetscFree(jj);CHKERRQ(ierr); 1100 1101 /* store nonzero values */ 1102 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1103 count = 0; 1104 for (i=0; i<a->mbs; i++) { 1105 for (j=0; j<bs; j++) { 1106 for (k=a->i[i]; k<a->i[i+1]; k++) { 1107 for (l=0; l<bs; l++) { 1108 aa[count++] = a->a[bs2*k + l*bs + j]; 1109 } 1110 } 1111 } 1112 } 1113 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1114 ierr = PetscFree(aa);CHKERRQ(ierr); 1115 1116 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1117 if (file) { 1118 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1125 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1126 { 1127 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1130 PetscViewerFormat format; 1131 1132 PetscFunctionBegin; 1133 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1134 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1135 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1136 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1137 Mat aij; 1138 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1139 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1140 ierr = MatDestroy(aij);CHKERRQ(ierr); 1141 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1142 PetscFunctionReturn(0); 1143 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1144 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1145 for (i=0; i<a->mbs; i++) { 1146 for (j=0; j<bs; j++) { 1147 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1148 for (k=a->i[i]; k<a->i[i+1]; k++) { 1149 for (l=0; l<bs; l++) { 1150 #if defined(PETSC_USE_COMPLEX) 1151 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1152 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1153 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1154 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1155 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1156 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1157 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1158 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1159 } 1160 #else 1161 if (a->a[bs2*k + l*bs + j] != 0.0) { 1162 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1163 } 1164 #endif 1165 } 1166 } 1167 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1168 } 1169 } 1170 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1171 } else { 1172 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1173 for (i=0; i<a->mbs; i++) { 1174 for (j=0; j<bs; j++) { 1175 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1176 for (k=a->i[i]; k<a->i[i+1]; k++) { 1177 for (l=0; l<bs; l++) { 1178 #if defined(PETSC_USE_COMPLEX) 1179 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1180 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1181 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1182 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1183 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1184 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1185 } else { 1186 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1187 } 1188 #else 1189 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1190 #endif 1191 } 1192 } 1193 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1194 } 1195 } 1196 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1197 } 1198 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1204 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1205 { 1206 Mat A = (Mat) Aa; 1207 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1208 PetscErrorCode ierr; 1209 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1210 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1211 MatScalar *aa; 1212 PetscViewer viewer; 1213 1214 PetscFunctionBegin; 1215 1216 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1217 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1218 1219 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1220 1221 /* loop over matrix elements drawing boxes */ 1222 color = PETSC_DRAW_BLUE; 1223 for (i=0,row=0; i<mbs; i++,row+=bs) { 1224 for (j=a->i[i]; j<a->i[i+1]; j++) { 1225 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1226 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1227 aa = a->a + j*bs2; 1228 for (k=0; k<bs; k++) { 1229 for (l=0; l<bs; l++) { 1230 if (PetscRealPart(*aa++) >= 0.) continue; 1231 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1232 } 1233 } 1234 } 1235 } 1236 color = PETSC_DRAW_CYAN; 1237 for (i=0,row=0; i<mbs; i++,row+=bs) { 1238 for (j=a->i[i]; j<a->i[i+1]; j++) { 1239 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1241 aa = a->a + j*bs2; 1242 for (k=0; k<bs; k++) { 1243 for (l=0; l<bs; l++) { 1244 if (PetscRealPart(*aa++) != 0.) continue; 1245 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1246 } 1247 } 1248 } 1249 } 1250 1251 color = PETSC_DRAW_RED; 1252 for (i=0,row=0; i<mbs; i++,row+=bs) { 1253 for (j=a->i[i]; j<a->i[i+1]; j++) { 1254 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1255 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1256 aa = a->a + j*bs2; 1257 for (k=0; k<bs; k++) { 1258 for (l=0; l<bs; l++) { 1259 if (PetscRealPart(*aa++) <= 0.) continue; 1260 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1261 } 1262 } 1263 } 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1270 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1271 { 1272 PetscErrorCode ierr; 1273 PetscReal xl,yl,xr,yr,w,h; 1274 PetscDraw draw; 1275 PetscTruth isnull; 1276 1277 PetscFunctionBegin; 1278 1279 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1280 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1281 1282 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1283 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1284 xr += w; yr += h; xl = -w; yl = -h; 1285 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1286 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1287 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatView_SeqBAIJ" 1293 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1294 { 1295 PetscErrorCode ierr; 1296 PetscTruth iascii,isbinary,isdraw; 1297 1298 PetscFunctionBegin; 1299 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1300 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1301 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1302 if (iascii){ 1303 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1304 } else if (isbinary) { 1305 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1306 } else if (isdraw) { 1307 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1308 } else { 1309 Mat B; 1310 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1311 ierr = MatView(B,viewer);CHKERRQ(ierr); 1312 ierr = MatDestroy(B);CHKERRQ(ierr); 1313 } 1314 PetscFunctionReturn(0); 1315 } 1316 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1320 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1321 { 1322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1323 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1324 PetscInt *ai = a->i,*ailen = a->ilen; 1325 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1326 MatScalar *ap,*aa = a->a; 1327 1328 PetscFunctionBegin; 1329 for (k=0; k<m; k++) { /* loop over rows */ 1330 row = im[k]; brow = row/bs; 1331 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1332 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1333 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1334 nrow = ailen[brow]; 1335 for (l=0; l<n; l++) { /* loop over columns */ 1336 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1337 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1338 col = in[l] ; 1339 bcol = col/bs; 1340 cidx = col%bs; 1341 ridx = row%bs; 1342 high = nrow; 1343 low = 0; /* assume unsorted */ 1344 while (high-low > 5) { 1345 t = (low+high)/2; 1346 if (rp[t] > bcol) high = t; 1347 else low = t; 1348 } 1349 for (i=low; i<high; i++) { 1350 if (rp[i] > bcol) break; 1351 if (rp[i] == bcol) { 1352 *v++ = ap[bs2*i+bs*cidx+ridx]; 1353 goto finished; 1354 } 1355 } 1356 *v++ = 0.0; 1357 finished:; 1358 } 1359 } 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #if defined(PETSC_USE_MAT_SINGLE) 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1366 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1367 { 1368 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1369 PetscErrorCode ierr; 1370 PetscInt i,N = m*n*b->bs2; 1371 MatScalar *vsingle; 1372 1373 PetscFunctionBegin; 1374 if (N > b->setvalueslen) { 1375 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1376 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1377 b->setvalueslen = N; 1378 } 1379 vsingle = b->setvaluescopy; 1380 for (i=0; i<N; i++) { 1381 vsingle[i] = v[i]; 1382 } 1383 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1384 PetscFunctionReturn(0); 1385 } 1386 #endif 1387 1388 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1391 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1392 { 1393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1394 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1395 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1396 PetscErrorCode ierr; 1397 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1398 PetscTruth roworiented=a->roworiented; 1399 const MatScalar *value = v; 1400 MatScalar *ap,*aa = a->a,*bap; 1401 1402 PetscFunctionBegin; 1403 if (roworiented) { 1404 stepval = (n-1)*bs; 1405 } else { 1406 stepval = (m-1)*bs; 1407 } 1408 for (k=0; k<m; k++) { /* loop over added rows */ 1409 row = im[k]; 1410 if (row < 0) continue; 1411 #if defined(PETSC_USE_DEBUG) 1412 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1413 #endif 1414 rp = aj + ai[row]; 1415 ap = aa + bs2*ai[row]; 1416 rmax = imax[row]; 1417 nrow = ailen[row]; 1418 low = 0; 1419 high = nrow; 1420 for (l=0; l<n; l++) { /* loop over added columns */ 1421 if (in[l] < 0) continue; 1422 #if defined(PETSC_USE_DEBUG) 1423 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1424 #endif 1425 col = in[l]; 1426 if (roworiented) { 1427 value = v + k*(stepval+bs)*bs + l*bs; 1428 } else { 1429 value = v + l*(stepval+bs)*bs + k*bs; 1430 } 1431 if (col <= lastcol) low = 0; else high = nrow; 1432 lastcol = col; 1433 while (high-low > 7) { 1434 t = (low+high)/2; 1435 if (rp[t] > col) high = t; 1436 else low = t; 1437 } 1438 for (i=low; i<high; i++) { 1439 if (rp[i] > col) break; 1440 if (rp[i] == col) { 1441 bap = ap + bs2*i; 1442 if (roworiented) { 1443 if (is == ADD_VALUES) { 1444 for (ii=0; ii<bs; ii++,value+=stepval) { 1445 for (jj=ii; jj<bs2; jj+=bs) { 1446 bap[jj] += *value++; 1447 } 1448 } 1449 } else { 1450 for (ii=0; ii<bs; ii++,value+=stepval) { 1451 for (jj=ii; jj<bs2; jj+=bs) { 1452 bap[jj] = *value++; 1453 } 1454 } 1455 } 1456 } else { 1457 if (is == ADD_VALUES) { 1458 for (ii=0; ii<bs; ii++,value+=stepval) { 1459 for (jj=0; jj<bs; jj++) { 1460 *bap++ += *value++; 1461 } 1462 } 1463 } else { 1464 for (ii=0; ii<bs; ii++,value+=stepval) { 1465 for (jj=0; jj<bs; jj++) { 1466 *bap++ = *value++; 1467 } 1468 } 1469 } 1470 } 1471 goto noinsert2; 1472 } 1473 } 1474 if (nonew == 1) goto noinsert2; 1475 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1476 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1477 N = nrow++ - 1; high++; 1478 /* shift up all the later entries in this row */ 1479 for (ii=N; ii>=i; ii--) { 1480 rp[ii+1] = rp[ii]; 1481 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1482 } 1483 if (N >= i) { 1484 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1485 } 1486 rp[i] = col; 1487 bap = ap + bs2*i; 1488 if (roworiented) { 1489 for (ii=0; ii<bs; ii++,value+=stepval) { 1490 for (jj=ii; jj<bs2; jj+=bs) { 1491 bap[jj] = *value++; 1492 } 1493 } 1494 } else { 1495 for (ii=0; ii<bs; ii++,value+=stepval) { 1496 for (jj=0; jj<bs; jj++) { 1497 *bap++ = *value++; 1498 } 1499 } 1500 } 1501 noinsert2:; 1502 low = i; 1503 } 1504 ailen[row] = nrow; 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 #undef __FUNCT__ 1510 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1511 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1512 { 1513 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1514 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1515 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1516 PetscErrorCode ierr; 1517 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1518 MatScalar *aa = a->a,*ap; 1519 PetscReal ratio=0.6; 1520 1521 PetscFunctionBegin; 1522 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1523 1524 if (m) rmax = ailen[0]; 1525 for (i=1; i<mbs; i++) { 1526 /* move each row back by the amount of empty slots (fshift) before it*/ 1527 fshift += imax[i-1] - ailen[i-1]; 1528 rmax = PetscMax(rmax,ailen[i]); 1529 if (fshift) { 1530 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1531 N = ailen[i]; 1532 for (j=0; j<N; j++) { 1533 ip[j-fshift] = ip[j]; 1534 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1535 } 1536 } 1537 ai[i] = ai[i-1] + ailen[i-1]; 1538 } 1539 if (mbs) { 1540 fshift += imax[mbs-1] - ailen[mbs-1]; 1541 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1542 } 1543 /* reset ilen and imax for each row */ 1544 for (i=0; i<mbs; i++) { 1545 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1546 } 1547 a->nz = ai[mbs]; 1548 1549 /* diagonals may have moved, so kill the diagonal pointers */ 1550 a->idiagvalid = PETSC_FALSE; 1551 if (fshift && a->diag) { 1552 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1553 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1554 a->diag = 0; 1555 } 1556 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1557 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1558 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1559 a->reallocs = 0; 1560 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1561 1562 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1563 if (a->compressedrow.use){ 1564 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1565 } 1566 1567 A->same_nonzero = PETSC_TRUE; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 /* 1572 This function returns an array of flags which indicate the locations of contiguous 1573 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1574 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1575 Assume: sizes should be long enough to hold all the values. 1576 */ 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1579 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1580 { 1581 PetscInt i,j,k,row; 1582 PetscTruth flg; 1583 1584 PetscFunctionBegin; 1585 for (i=0,j=0; i<n; j++) { 1586 row = idx[i]; 1587 if (row%bs!=0) { /* Not the begining of a block */ 1588 sizes[j] = 1; 1589 i++; 1590 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1591 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1592 i++; 1593 } else { /* Begining of the block, so check if the complete block exists */ 1594 flg = PETSC_TRUE; 1595 for (k=1; k<bs; k++) { 1596 if (row+k != idx[i+k]) { /* break in the block */ 1597 flg = PETSC_FALSE; 1598 break; 1599 } 1600 } 1601 if (flg) { /* No break in the bs */ 1602 sizes[j] = bs; 1603 i+= bs; 1604 } else { 1605 sizes[j] = 1; 1606 i++; 1607 } 1608 } 1609 } 1610 *bs_max = j; 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1616 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1617 { 1618 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1619 PetscErrorCode ierr; 1620 PetscInt i,j,k,count,*rows; 1621 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1622 PetscScalar zero = 0.0; 1623 MatScalar *aa; 1624 1625 PetscFunctionBegin; 1626 /* Make a copy of the IS and sort it */ 1627 /* allocate memory for rows,sizes */ 1628 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1629 sizes = rows + is_n; 1630 1631 /* copy IS values to rows, and sort them */ 1632 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1633 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1634 if (baij->keepzeroedrows) { 1635 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1636 bs_max = is_n; 1637 A->same_nonzero = PETSC_TRUE; 1638 } else { 1639 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1640 A->same_nonzero = PETSC_FALSE; 1641 } 1642 1643 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1644 row = rows[j]; 1645 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1646 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1647 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1648 if (sizes[i] == bs && !baij->keepzeroedrows) { 1649 if (diag != 0.0) { 1650 if (baij->ilen[row/bs] > 0) { 1651 baij->ilen[row/bs] = 1; 1652 baij->j[baij->i[row/bs]] = row/bs; 1653 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1654 } 1655 /* Now insert all the diagonal values for this bs */ 1656 for (k=0; k<bs; k++) { 1657 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1658 } 1659 } else { /* (diag == 0.0) */ 1660 baij->ilen[row/bs] = 0; 1661 } /* end (diag == 0.0) */ 1662 } else { /* (sizes[i] != bs) */ 1663 #if defined (PETSC_USE_DEBUG) 1664 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1665 #endif 1666 for (k=0; k<count; k++) { 1667 aa[0] = zero; 1668 aa += bs; 1669 } 1670 if (diag != 0.0) { 1671 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1672 } 1673 } 1674 } 1675 1676 ierr = PetscFree(rows);CHKERRQ(ierr); 1677 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1683 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1684 { 1685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1686 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1687 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1688 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1689 PetscErrorCode ierr; 1690 PetscInt ridx,cidx,bs2=a->bs2; 1691 PetscTruth roworiented=a->roworiented; 1692 MatScalar *ap,value,*aa=a->a,*bap; 1693 1694 PetscFunctionBegin; 1695 for (k=0; k<m; k++) { /* loop over added rows */ 1696 row = im[k]; 1697 brow = row/bs; 1698 if (row < 0) continue; 1699 #if defined(PETSC_USE_DEBUG) 1700 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1701 #endif 1702 rp = aj + ai[brow]; 1703 ap = aa + bs2*ai[brow]; 1704 rmax = imax[brow]; 1705 nrow = ailen[brow]; 1706 low = 0; 1707 high = nrow; 1708 for (l=0; l<n; l++) { /* loop over added columns */ 1709 if (in[l] < 0) continue; 1710 #if defined(PETSC_USE_DEBUG) 1711 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1712 #endif 1713 col = in[l]; bcol = col/bs; 1714 ridx = row % bs; cidx = col % bs; 1715 if (roworiented) { 1716 value = v[l + k*n]; 1717 } else { 1718 value = v[k + l*m]; 1719 } 1720 if (col <= lastcol) low = 0; else high = nrow; 1721 lastcol = col; 1722 while (high-low > 7) { 1723 t = (low+high)/2; 1724 if (rp[t] > bcol) high = t; 1725 else low = t; 1726 } 1727 for (i=low; i<high; i++) { 1728 if (rp[i] > bcol) break; 1729 if (rp[i] == bcol) { 1730 bap = ap + bs2*i + bs*cidx + ridx; 1731 if (is == ADD_VALUES) *bap += value; 1732 else *bap = value; 1733 goto noinsert1; 1734 } 1735 } 1736 if (nonew == 1) goto noinsert1; 1737 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1738 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1739 N = nrow++ - 1; high++; 1740 /* shift up all the later entries in this row */ 1741 for (ii=N; ii>=i; ii--) { 1742 rp[ii+1] = rp[ii]; 1743 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1744 } 1745 if (N>=i) { 1746 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1747 } 1748 rp[i] = bcol; 1749 ap[bs2*i + bs*cidx + ridx] = value; 1750 a->nz++; 1751 noinsert1:; 1752 low = i; 1753 } 1754 ailen[brow] = nrow; 1755 } 1756 A->same_nonzero = PETSC_FALSE; 1757 PetscFunctionReturn(0); 1758 } 1759 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1763 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1764 { 1765 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1766 Mat outA; 1767 PetscErrorCode ierr; 1768 PetscTruth row_identity,col_identity; 1769 1770 PetscFunctionBegin; 1771 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1772 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1773 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1774 if (!row_identity || !col_identity) { 1775 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1776 } 1777 1778 outA = inA; 1779 inA->factor = FACTOR_LU; 1780 1781 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1782 1783 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1784 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1785 a->row = row; 1786 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1787 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1788 a->col = col; 1789 1790 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1791 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1792 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1793 1794 /* 1795 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1796 for ILU(0) factorization with natural ordering 1797 */ 1798 if (inA->rmap.bs < 8) { 1799 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1800 } else { 1801 if (!a->solve_work) { 1802 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1803 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1804 } 1805 } 1806 1807 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1808 1809 PetscFunctionReturn(0); 1810 } 1811 1812 EXTERN_C_BEGIN 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1815 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1816 { 1817 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1818 PetscInt i,nz,nbs; 1819 1820 PetscFunctionBegin; 1821 nz = baij->maxnz/baij->bs2; 1822 nbs = baij->nbs; 1823 for (i=0; i<nz; i++) { 1824 baij->j[i] = indices[i]; 1825 } 1826 baij->nz = nz; 1827 for (i=0; i<nbs; i++) { 1828 baij->ilen[i] = baij->imax[i]; 1829 } 1830 1831 PetscFunctionReturn(0); 1832 } 1833 EXTERN_C_END 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1837 /*@ 1838 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1839 in the matrix. 1840 1841 Input Parameters: 1842 + mat - the SeqBAIJ matrix 1843 - indices - the column indices 1844 1845 Level: advanced 1846 1847 Notes: 1848 This can be called if you have precomputed the nonzero structure of the 1849 matrix and want to provide it to the matrix object to improve the performance 1850 of the MatSetValues() operation. 1851 1852 You MUST have set the correct numbers of nonzeros per row in the call to 1853 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1854 1855 MUST be called before any calls to MatSetValues(); 1856 1857 @*/ 1858 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1859 { 1860 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1861 1862 PetscFunctionBegin; 1863 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1864 PetscValidPointer(indices,2); 1865 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1866 if (f) { 1867 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1868 } else { 1869 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1876 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1877 { 1878 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1879 PetscErrorCode ierr; 1880 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1881 PetscReal atmp; 1882 PetscScalar *x,zero = 0.0; 1883 MatScalar *aa; 1884 PetscInt ncols,brow,krow,kcol; 1885 1886 PetscFunctionBegin; 1887 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1888 bs = A->rmap.bs; 1889 aa = a->a; 1890 ai = a->i; 1891 aj = a->j; 1892 mbs = a->mbs; 1893 1894 ierr = VecSet(v,zero);CHKERRQ(ierr); 1895 if (idx) { 1896 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1897 } 1898 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1899 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1900 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1901 for (i=0; i<mbs; i++) { 1902 ncols = ai[1] - ai[0]; ai++; 1903 brow = bs*i; 1904 for (j=0; j<ncols; j++){ 1905 for (kcol=0; kcol<bs; kcol++){ 1906 for (krow=0; krow<bs; krow++){ 1907 atmp = PetscAbsScalar(*aa);aa++; 1908 row = brow + krow; /* row index */ 1909 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1910 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1911 } 1912 } 1913 aj++; 1914 } 1915 } 1916 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatCopy_SeqBAIJ" 1922 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1923 { 1924 PetscErrorCode ierr; 1925 1926 PetscFunctionBegin; 1927 /* If the two matrices have the same copy implementation, use fast copy. */ 1928 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1929 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1930 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1931 1932 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1933 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1934 } 1935 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1936 } else { 1937 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1944 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1945 { 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1955 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1956 { 1957 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1958 PetscFunctionBegin; 1959 *array = a->a; 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1965 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1966 { 1967 PetscFunctionBegin; 1968 PetscFunctionReturn(0); 1969 } 1970 1971 #include "petscblaslapack.h" 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1974 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1975 { 1976 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1977 PetscErrorCode ierr; 1978 PetscInt i,bs=Y->rmap.bs,j,bs2; 1979 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1980 1981 PetscFunctionBegin; 1982 if (str == SAME_NONZERO_PATTERN) { 1983 PetscScalar alpha = a; 1984 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1985 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1986 if (y->xtoy && y->XtoY != X) { 1987 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1988 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1989 } 1990 if (!y->xtoy) { /* get xtoy */ 1991 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1992 y->XtoY = X; 1993 } 1994 bs2 = bs*bs; 1995 for (i=0; i<x->nz; i++) { 1996 j = 0; 1997 while (j < bs2){ 1998 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1999 j++; 2000 } 2001 } 2002 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2003 } else { 2004 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2005 } 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2011 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2012 { 2013 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2014 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2015 PetscScalar *aa = a->a; 2016 2017 PetscFunctionBegin; 2018 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2024 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2025 { 2026 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2027 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2028 PetscScalar *aa = a->a; 2029 2030 PetscFunctionBegin; 2031 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 2036 /* -------------------------------------------------------------------*/ 2037 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2038 MatGetRow_SeqBAIJ, 2039 MatRestoreRow_SeqBAIJ, 2040 MatMult_SeqBAIJ_N, 2041 /* 4*/ MatMultAdd_SeqBAIJ_N, 2042 MatMultTranspose_SeqBAIJ, 2043 MatMultTransposeAdd_SeqBAIJ, 2044 MatSolve_SeqBAIJ_N, 2045 0, 2046 0, 2047 /*10*/ 0, 2048 MatLUFactor_SeqBAIJ, 2049 0, 2050 0, 2051 MatTranspose_SeqBAIJ, 2052 /*15*/ MatGetInfo_SeqBAIJ, 2053 MatEqual_SeqBAIJ, 2054 MatGetDiagonal_SeqBAIJ, 2055 MatDiagonalScale_SeqBAIJ, 2056 MatNorm_SeqBAIJ, 2057 /*20*/ 0, 2058 MatAssemblyEnd_SeqBAIJ, 2059 0, 2060 MatSetOption_SeqBAIJ, 2061 MatZeroEntries_SeqBAIJ, 2062 /*25*/ MatZeroRows_SeqBAIJ, 2063 MatLUFactorSymbolic_SeqBAIJ, 2064 MatLUFactorNumeric_SeqBAIJ_N, 2065 MatCholeskyFactorSymbolic_SeqBAIJ, 2066 MatCholeskyFactorNumeric_SeqBAIJ_N, 2067 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2068 MatILUFactorSymbolic_SeqBAIJ, 2069 MatICCFactorSymbolic_SeqBAIJ, 2070 MatGetArray_SeqBAIJ, 2071 MatRestoreArray_SeqBAIJ, 2072 /*35*/ MatDuplicate_SeqBAIJ, 2073 0, 2074 0, 2075 MatILUFactor_SeqBAIJ, 2076 0, 2077 /*40*/ MatAXPY_SeqBAIJ, 2078 MatGetSubMatrices_SeqBAIJ, 2079 MatIncreaseOverlap_SeqBAIJ, 2080 MatGetValues_SeqBAIJ, 2081 MatCopy_SeqBAIJ, 2082 /*45*/ 0, 2083 MatScale_SeqBAIJ, 2084 0, 2085 0, 2086 0, 2087 /*50*/ 0, 2088 MatGetRowIJ_SeqBAIJ, 2089 MatRestoreRowIJ_SeqBAIJ, 2090 0, 2091 0, 2092 /*55*/ 0, 2093 0, 2094 0, 2095 0, 2096 MatSetValuesBlocked_SeqBAIJ, 2097 /*60*/ MatGetSubMatrix_SeqBAIJ, 2098 MatDestroy_SeqBAIJ, 2099 MatView_SeqBAIJ, 2100 0, 2101 0, 2102 /*65*/ 0, 2103 0, 2104 0, 2105 0, 2106 0, 2107 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2108 MatConvert_Basic, 2109 0, 2110 0, 2111 0, 2112 /*75*/ 0, 2113 0, 2114 0, 2115 0, 2116 0, 2117 /*80*/ 0, 2118 0, 2119 0, 2120 0, 2121 MatLoad_SeqBAIJ, 2122 /*85*/ 0, 2123 0, 2124 0, 2125 0, 2126 0, 2127 /*90*/ 0, 2128 0, 2129 0, 2130 0, 2131 0, 2132 /*95*/ 0, 2133 0, 2134 0, 2135 0, 2136 0, 2137 /*100*/0, 2138 0, 2139 0, 2140 0, 2141 0, 2142 /*105*/0, 2143 MatRealPart_SeqBAIJ, 2144 MatImaginaryPart_SeqBAIJ, 2145 0, 2146 0, 2147 /*110*/0, 2148 0, 2149 0, 2150 0, 2151 MatMissingDiagonal_SeqBAIJ 2152 /*115*/ 2153 }; 2154 2155 EXTERN_C_BEGIN 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2158 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2159 { 2160 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2161 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2162 PetscErrorCode ierr; 2163 2164 PetscFunctionBegin; 2165 if (aij->nonew != 1) { 2166 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2167 } 2168 2169 /* allocate space for values if not already there */ 2170 if (!aij->saved_values) { 2171 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2172 } 2173 2174 /* copy values over */ 2175 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 EXTERN_C_END 2179 2180 EXTERN_C_BEGIN 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2183 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2184 { 2185 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2186 PetscErrorCode ierr; 2187 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2188 2189 PetscFunctionBegin; 2190 if (aij->nonew != 1) { 2191 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2192 } 2193 if (!aij->saved_values) { 2194 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2195 } 2196 2197 /* copy values over */ 2198 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2199 PetscFunctionReturn(0); 2200 } 2201 EXTERN_C_END 2202 2203 EXTERN_C_BEGIN 2204 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2205 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2206 EXTERN_C_END 2207 2208 EXTERN_C_BEGIN 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2211 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2212 { 2213 Mat_SeqBAIJ *b; 2214 PetscErrorCode ierr; 2215 PetscInt i,mbs,nbs,bs2,newbs = bs; 2216 PetscTruth flg,skipallocation = PETSC_FALSE; 2217 2218 PetscFunctionBegin; 2219 2220 if (nz == MAT_SKIP_ALLOCATION) { 2221 skipallocation = PETSC_TRUE; 2222 nz = 0; 2223 } 2224 2225 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2226 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2227 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2228 2229 if (nnz && newbs != bs) { 2230 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2231 } 2232 bs = newbs; 2233 2234 B->rmap.bs = B->cmap.bs = bs; 2235 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2236 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2237 2238 B->preallocated = PETSC_TRUE; 2239 2240 mbs = B->rmap.n/bs; 2241 nbs = B->cmap.n/bs; 2242 bs2 = bs*bs; 2243 2244 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2245 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2246 } 2247 2248 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2249 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2250 if (nnz) { 2251 for (i=0; i<mbs; i++) { 2252 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2253 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2254 } 2255 } 2256 2257 b = (Mat_SeqBAIJ*)B->data; 2258 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2259 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2260 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2261 2262 B->ops->solve = MatSolve_SeqBAIJ_Update; 2263 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2264 if (!flg) { 2265 switch (bs) { 2266 case 1: 2267 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2268 B->ops->mult = MatMult_SeqBAIJ_1; 2269 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2270 break; 2271 case 2: 2272 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2273 B->ops->mult = MatMult_SeqBAIJ_2; 2274 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2275 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2276 break; 2277 case 3: 2278 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2279 B->ops->mult = MatMult_SeqBAIJ_3; 2280 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2281 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2282 break; 2283 case 4: 2284 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2285 B->ops->mult = MatMult_SeqBAIJ_4; 2286 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2287 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2288 break; 2289 case 5: 2290 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2291 B->ops->mult = MatMult_SeqBAIJ_5; 2292 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2293 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2294 break; 2295 case 6: 2296 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2297 B->ops->mult = MatMult_SeqBAIJ_6; 2298 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2299 break; 2300 case 7: 2301 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2302 B->ops->mult = MatMult_SeqBAIJ_7; 2303 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2304 break; 2305 default: 2306 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2307 B->ops->mult = MatMult_SeqBAIJ_N; 2308 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2309 break; 2310 } 2311 } 2312 B->rmap.bs = bs; 2313 b->mbs = mbs; 2314 b->nbs = nbs; 2315 if (!skipallocation) { 2316 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2317 /* b->ilen will count nonzeros in each block row so far. */ 2318 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2319 if (!nnz) { 2320 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2321 else if (nz <= 0) nz = 1; 2322 for (i=0; i<mbs; i++) b->imax[i] = nz; 2323 nz = nz*mbs; 2324 } else { 2325 nz = 0; 2326 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2327 } 2328 2329 /* allocate the matrix space */ 2330 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2331 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2332 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2333 b->singlemalloc = PETSC_TRUE; 2334 2335 b->i[0] = 0; 2336 for (i=1; i<mbs+1; i++) { 2337 b->i[i] = b->i[i-1] + b->imax[i-1]; 2338 } 2339 b->free_a = PETSC_TRUE; 2340 b->free_ij = PETSC_TRUE; 2341 } else { 2342 b->free_a = PETSC_FALSE; 2343 b->free_ij = PETSC_FALSE; 2344 } 2345 2346 B->rmap.bs = bs; 2347 b->bs2 = bs2; 2348 b->mbs = mbs; 2349 b->nz = 0; 2350 b->maxnz = nz*bs2; 2351 B->info.nz_unneeded = (PetscReal)b->maxnz; 2352 PetscFunctionReturn(0); 2353 } 2354 EXTERN_C_END 2355 2356 /*MC 2357 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2358 block sparse compressed row format. 2359 2360 Options Database Keys: 2361 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2362 2363 Level: beginner 2364 2365 .seealso: MatCreateSeqBAIJ() 2366 M*/ 2367 2368 EXTERN_C_BEGIN 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatCreate_SeqBAIJ" 2371 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2372 { 2373 PetscErrorCode ierr; 2374 PetscMPIInt size; 2375 Mat_SeqBAIJ *b; 2376 2377 PetscFunctionBegin; 2378 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 2379 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2380 2381 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2382 B->data = (void*)b; 2383 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2384 B->factor = 0; 2385 B->mapping = 0; 2386 b->row = 0; 2387 b->col = 0; 2388 b->icol = 0; 2389 b->reallocs = 0; 2390 b->saved_values = 0; 2391 #if defined(PETSC_USE_MAT_SINGLE) 2392 b->setvalueslen = 0; 2393 b->setvaluescopy = PETSC_NULL; 2394 #endif 2395 2396 b->roworiented = PETSC_TRUE; 2397 b->nonew = 0; 2398 b->diag = 0; 2399 b->solve_work = 0; 2400 b->mult_work = 0; 2401 B->spptr = 0; 2402 B->info.nz_unneeded = (PetscReal)b->maxnz; 2403 b->keepzeroedrows = PETSC_FALSE; 2404 b->xtoy = 0; 2405 b->XtoY = 0; 2406 b->compressedrow.use = PETSC_FALSE; 2407 b->compressedrow.nrows = 0; 2408 b->compressedrow.i = PETSC_NULL; 2409 b->compressedrow.rindex = PETSC_NULL; 2410 b->compressedrow.checked = PETSC_FALSE; 2411 B->same_nonzero = PETSC_FALSE; 2412 2413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2414 "MatInvertBlockDiagonal_SeqBAIJ", 2415 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2417 "MatStoreValues_SeqBAIJ", 2418 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2419 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2420 "MatRetrieveValues_SeqBAIJ", 2421 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2422 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2423 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2424 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2425 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2426 "MatConvert_SeqBAIJ_SeqAIJ", 2427 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2428 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2429 "MatConvert_SeqBAIJ_SeqSBAIJ", 2430 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2431 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2432 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2433 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2434 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2435 PetscFunctionReturn(0); 2436 } 2437 EXTERN_C_END 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2441 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2442 { 2443 Mat C; 2444 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2445 PetscErrorCode ierr; 2446 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2447 2448 PetscFunctionBegin; 2449 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2450 2451 *B = 0; 2452 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 2453 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2454 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2455 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2456 c = (Mat_SeqBAIJ*)C->data; 2457 2458 C->rmap.N = A->rmap.N; 2459 C->cmap.N = A->cmap.N; 2460 C->rmap.bs = A->rmap.bs; 2461 c->bs2 = a->bs2; 2462 c->mbs = a->mbs; 2463 c->nbs = a->nbs; 2464 2465 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2466 for (i=0; i<mbs; i++) { 2467 c->imax[i] = a->imax[i]; 2468 c->ilen[i] = a->ilen[i]; 2469 } 2470 2471 /* allocate the matrix space */ 2472 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2473 c->singlemalloc = PETSC_TRUE; 2474 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2475 if (mbs > 0) { 2476 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2477 if (cpvalues == MAT_COPY_VALUES) { 2478 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2479 } else { 2480 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2481 } 2482 } 2483 c->roworiented = a->roworiented; 2484 c->nonew = a->nonew; 2485 2486 if (a->diag) { 2487 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2488 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2489 for (i=0; i<mbs; i++) { 2490 c->diag[i] = a->diag[i]; 2491 } 2492 } else c->diag = 0; 2493 c->nz = a->nz; 2494 c->maxnz = a->maxnz; 2495 c->solve_work = 0; 2496 c->mult_work = 0; 2497 c->free_a = PETSC_TRUE; 2498 c->free_ij = PETSC_TRUE; 2499 C->preallocated = PETSC_TRUE; 2500 C->assembled = PETSC_TRUE; 2501 2502 c->compressedrow.use = a->compressedrow.use; 2503 c->compressedrow.nrows = a->compressedrow.nrows; 2504 c->compressedrow.checked = a->compressedrow.checked; 2505 if ( a->compressedrow.checked && a->compressedrow.use){ 2506 i = a->compressedrow.nrows; 2507 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2508 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2509 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2510 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2511 } else { 2512 c->compressedrow.use = PETSC_FALSE; 2513 c->compressedrow.i = PETSC_NULL; 2514 c->compressedrow.rindex = PETSC_NULL; 2515 } 2516 C->same_nonzero = A->same_nonzero; 2517 *B = C; 2518 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatLoad_SeqBAIJ" 2524 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2525 { 2526 Mat_SeqBAIJ *a; 2527 Mat B; 2528 PetscErrorCode ierr; 2529 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2530 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2531 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2532 PetscInt *masked,nmask,tmp,bs2,ishift; 2533 PetscMPIInt size; 2534 int fd; 2535 PetscScalar *aa; 2536 MPI_Comm comm = ((PetscObject)viewer)->comm; 2537 2538 PetscFunctionBegin; 2539 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2540 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2541 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2542 bs2 = bs*bs; 2543 2544 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2545 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2546 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2547 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2548 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2549 M = header[1]; N = header[2]; nz = header[3]; 2550 2551 if (header[3] < 0) { 2552 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2553 } 2554 2555 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2556 2557 /* 2558 This code adds extra rows to make sure the number of rows is 2559 divisible by the blocksize 2560 */ 2561 mbs = M/bs; 2562 extra_rows = bs - M + bs*(mbs); 2563 if (extra_rows == bs) extra_rows = 0; 2564 else mbs++; 2565 if (extra_rows) { 2566 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2567 } 2568 2569 /* read in row lengths */ 2570 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2571 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2572 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2573 2574 /* read in column indices */ 2575 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2576 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2577 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2578 2579 /* loop over row lengths determining block row lengths */ 2580 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2581 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2582 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2583 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2584 masked = mask + mbs; 2585 rowcount = 0; nzcount = 0; 2586 for (i=0; i<mbs; i++) { 2587 nmask = 0; 2588 for (j=0; j<bs; j++) { 2589 kmax = rowlengths[rowcount]; 2590 for (k=0; k<kmax; k++) { 2591 tmp = jj[nzcount++]/bs; 2592 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2593 } 2594 rowcount++; 2595 } 2596 browlengths[i] += nmask; 2597 /* zero out the mask elements we set */ 2598 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2599 } 2600 2601 /* create our matrix */ 2602 ierr = MatCreate(comm,&B); 2603 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2604 ierr = MatSetType(B,type);CHKERRQ(ierr); 2605 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2606 a = (Mat_SeqBAIJ*)B->data; 2607 2608 /* set matrix "i" values */ 2609 a->i[0] = 0; 2610 for (i=1; i<= mbs; i++) { 2611 a->i[i] = a->i[i-1] + browlengths[i-1]; 2612 a->ilen[i-1] = browlengths[i-1]; 2613 } 2614 a->nz = 0; 2615 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2616 2617 /* read in nonzero values */ 2618 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2619 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2620 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2621 2622 /* set "a" and "j" values into matrix */ 2623 nzcount = 0; jcount = 0; 2624 for (i=0; i<mbs; i++) { 2625 nzcountb = nzcount; 2626 nmask = 0; 2627 for (j=0; j<bs; j++) { 2628 kmax = rowlengths[i*bs+j]; 2629 for (k=0; k<kmax; k++) { 2630 tmp = jj[nzcount++]/bs; 2631 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2632 } 2633 } 2634 /* sort the masked values */ 2635 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2636 2637 /* set "j" values into matrix */ 2638 maskcount = 1; 2639 for (j=0; j<nmask; j++) { 2640 a->j[jcount++] = masked[j]; 2641 mask[masked[j]] = maskcount++; 2642 } 2643 /* set "a" values into matrix */ 2644 ishift = bs2*a->i[i]; 2645 for (j=0; j<bs; j++) { 2646 kmax = rowlengths[i*bs+j]; 2647 for (k=0; k<kmax; k++) { 2648 tmp = jj[nzcountb]/bs ; 2649 block = mask[tmp] - 1; 2650 point = jj[nzcountb] - bs*tmp; 2651 idx = ishift + bs2*block + j + bs*point; 2652 a->a[idx] = (MatScalar)aa[nzcountb++]; 2653 } 2654 } 2655 /* zero out the mask elements we set */ 2656 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2657 } 2658 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2659 2660 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2661 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2662 ierr = PetscFree(aa);CHKERRQ(ierr); 2663 ierr = PetscFree(jj);CHKERRQ(ierr); 2664 ierr = PetscFree(mask);CHKERRQ(ierr); 2665 2666 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2667 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2668 ierr = MatView_Private(B);CHKERRQ(ierr); 2669 2670 *A = B; 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatCreateSeqBAIJ" 2676 /*@C 2677 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2678 compressed row) format. For good matrix assembly performance the 2679 user should preallocate the matrix storage by setting the parameter nz 2680 (or the array nnz). By setting these parameters accurately, performance 2681 during matrix assembly can be increased by more than a factor of 50. 2682 2683 Collective on MPI_Comm 2684 2685 Input Parameters: 2686 + comm - MPI communicator, set to PETSC_COMM_SELF 2687 . bs - size of block 2688 . m - number of rows 2689 . n - number of columns 2690 . nz - number of nonzero blocks per block row (same for all rows) 2691 - nnz - array containing the number of nonzero blocks in the various block rows 2692 (possibly different for each block row) or PETSC_NULL 2693 2694 Output Parameter: 2695 . A - the matrix 2696 2697 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2698 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 2699 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 2700 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2701 2702 Options Database Keys: 2703 . -mat_no_unroll - uses code that does not unroll the loops in the 2704 block calculations (much slower) 2705 . -mat_block_size - size of the blocks to use 2706 2707 Level: intermediate 2708 2709 Notes: 2710 The number of rows and columns must be divisible by blocksize. 2711 2712 If the nnz parameter is given then the nz parameter is ignored 2713 2714 A nonzero block is any block that as 1 or more nonzeros in it 2715 2716 The block AIJ format is fully compatible with standard Fortran 77 2717 storage. That is, the stored row and column indices can begin at 2718 either one (as in Fortran) or zero. See the users' manual for details. 2719 2720 Specify the preallocated storage with either nz or nnz (not both). 2721 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2722 allocation. For additional details, see the users manual chapter on 2723 matrices. 2724 2725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2726 @*/ 2727 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2728 { 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2733 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2734 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2735 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2741 /*@C 2742 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2743 per row in the matrix. For good matrix assembly performance the 2744 user should preallocate the matrix storage by setting the parameter nz 2745 (or the array nnz). By setting these parameters accurately, performance 2746 during matrix assembly can be increased by more than a factor of 50. 2747 2748 Collective on MPI_Comm 2749 2750 Input Parameters: 2751 + A - the matrix 2752 . bs - size of block 2753 . nz - number of block nonzeros per block row (same for all rows) 2754 - nnz - array containing the number of block nonzeros in the various block rows 2755 (possibly different for each block row) or PETSC_NULL 2756 2757 Options Database Keys: 2758 . -mat_no_unroll - uses code that does not unroll the loops in the 2759 block calculations (much slower) 2760 . -mat_block_size - size of the blocks to use 2761 2762 Level: intermediate 2763 2764 Notes: 2765 If the nnz parameter is given then the nz parameter is ignored 2766 2767 The block AIJ format is fully compatible with standard Fortran 77 2768 storage. That is, the stored row and column indices can begin at 2769 either one (as in Fortran) or zero. See the users' manual for details. 2770 2771 Specify the preallocated storage with either nz or nnz (not both). 2772 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2773 allocation. For additional details, see the users manual chapter on 2774 matrices. 2775 2776 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2777 @*/ 2778 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2779 { 2780 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2781 2782 PetscFunctionBegin; 2783 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2784 if (f) { 2785 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2792 /*@ 2793 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2794 (upper triangular entries in CSR format) provided by the user. 2795 2796 Collective on MPI_Comm 2797 2798 Input Parameters: 2799 + comm - must be an MPI communicator of size 1 2800 . bs - size of block 2801 . m - number of rows 2802 . n - number of columns 2803 . i - row indices 2804 . j - column indices 2805 - a - matrix values 2806 2807 Output Parameter: 2808 . mat - the matrix 2809 2810 Level: intermediate 2811 2812 Notes: 2813 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2814 once the matrix is destroyed 2815 2816 You cannot set new nonzero locations into this matrix, that will generate an error. 2817 2818 The i and j indices are 0 based 2819 2820 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2821 2822 @*/ 2823 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2824 { 2825 PetscErrorCode ierr; 2826 PetscInt ii; 2827 Mat_SeqBAIJ *baij; 2828 2829 PetscFunctionBegin; 2830 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2831 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2832 2833 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2834 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2835 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2836 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2837 baij = (Mat_SeqBAIJ*)(*mat)->data; 2838 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2839 2840 baij->i = i; 2841 baij->j = j; 2842 baij->a = a; 2843 baij->singlemalloc = PETSC_FALSE; 2844 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2845 baij->free_a = PETSC_FALSE; 2846 baij->free_ij = PETSC_FALSE; 2847 2848 for (ii=0; ii<m; ii++) { 2849 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2850 #if defined(PETSC_USE_DEBUG) 2851 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2852 #endif 2853 } 2854 #if defined(PETSC_USE_DEBUG) 2855 for (ii=0; ii<baij->i[m]; ii++) { 2856 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2857 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2858 } 2859 #endif 2860 2861 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865