1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for direct calls from Fortran (Used in PETSc-fun3d) 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval,lastcol = -1; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 if (A->bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 590 stepval = (n-1)*4; 591 for (k=0; k<m; k++) { /* loop over added rows */ 592 row = im[k]; 593 rp = aj + ai[row]; 594 ap = aa + 16*ai[row]; 595 nrow = ailen[row]; 596 low = 0; 597 high = nrow; 598 for (l=0; l<n; l++) { /* loop over added columns */ 599 col = in[l]; 600 if (col <= lastcol) low = 0; else high = nrow; 601 lastcol = col; 602 value = v + k*(stepval+4)*4 + l*4; 603 while (high-low > 7) { 604 t = (low+high)/2; 605 if (rp[t] > col) high = t; 606 else low = t; 607 } 608 for (i=low; i<high; i++) { 609 if (rp[i] > col) break; 610 if (rp[i] == col) { 611 bap = ap + 16*i; 612 for (ii=0; ii<4; ii++,value+=stepval) { 613 for (jj=ii; jj<16; jj+=4) { 614 bap[jj] += *value++; 615 } 616 } 617 goto noinsert2; 618 } 619 } 620 N = nrow++ - 1; 621 high++; /* added new column index thus must search to one higher than before */ 622 /* shift up all the later entries in this row */ 623 for (ii=N; ii>=i; ii--) { 624 rp[ii+1] = rp[ii]; 625 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 626 } 627 if (N >= i) { 628 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 629 } 630 rp[i] = col; 631 bap = ap + 16*i; 632 for (ii=0; ii<4; ii++,value+=stepval) { 633 for (jj=ii; jj<16; jj+=4) { 634 bap[jj] = *value++; 635 } 636 } 637 noinsert2:; 638 low = i; 639 } 640 ailen[row] = nrow; 641 } 642 PetscFunctionReturnVoid(); 643 } 644 EXTERN_C_END 645 646 #if defined(PETSC_HAVE_FORTRAN_CAPS) 647 #define matsetvalues4_ MATSETVALUES4 648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 649 #define matsetvalues4_ matsetvalues4 650 #endif 651 652 EXTERN_C_BEGIN 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatSetValues4_" 655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 656 { 657 Mat A = *AA; 658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 659 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 660 PetscInt *ai=a->i,*ailen=a->ilen; 661 PetscInt *aj=a->j,brow,bcol; 662 PetscInt ridx,cidx,lastcol = -1; 663 MatScalar *ap,value,*aa=a->a,*bap; 664 665 PetscFunctionBegin; 666 for (k=0; k<m; k++) { /* loop over added rows */ 667 row = im[k]; brow = row/4; 668 rp = aj + ai[brow]; 669 ap = aa + 16*ai[brow]; 670 nrow = ailen[brow]; 671 low = 0; 672 high = nrow; 673 for (l=0; l<n; l++) { /* loop over added columns */ 674 col = in[l]; bcol = col/4; 675 ridx = row % 4; cidx = col % 4; 676 value = v[l + k*n]; 677 if (col <= lastcol) low = 0; else high = nrow; 678 lastcol = col; 679 while (high-low > 7) { 680 t = (low+high)/2; 681 if (rp[t] > bcol) high = t; 682 else low = t; 683 } 684 for (i=low; i<high; i++) { 685 if (rp[i] > bcol) break; 686 if (rp[i] == bcol) { 687 bap = ap + 16*i + 4*cidx + ridx; 688 *bap += value; 689 goto noinsert1; 690 } 691 } 692 N = nrow++ - 1; 693 high++; /* added new column thus must search to one higher than before */ 694 /* shift up all the later entries in this row */ 695 for (ii=N; ii>=i; ii--) { 696 rp[ii+1] = rp[ii]; 697 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 698 } 699 if (N>=i) { 700 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 701 } 702 rp[i] = bcol; 703 ap[16*i + 4*cidx + ridx] = value; 704 noinsert1:; 705 low = i; 706 } 707 ailen[brow] = nrow; 708 } 709 PetscFunctionReturnVoid(); 710 } 711 EXTERN_C_END 712 713 /* UGLY, ugly, ugly 714 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 715 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 716 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 717 converts the entries into single precision and then calls ..._MatScalar() to put them 718 into the single precision data structures. 719 */ 720 #if defined(PETSC_USE_MAT_SINGLE) 721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 722 #else 723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 724 #endif 725 726 #define CHUNKSIZE 10 727 728 /* 729 Checks for missing diagonals 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 PetscErrorCode ierr; 737 PetscInt *diag,*jj = a->j,i; 738 739 PetscFunctionBegin; 740 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 741 diag = a->diag; 742 for (i=0; i<a->mbs; i++) { 743 if (jj[diag[i]] != i) { 744 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 745 } 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 753 { 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 755 PetscErrorCode ierr; 756 PetscInt i,j,*diag,m = a->mbs; 757 758 PetscFunctionBegin; 759 if (a->diag) PetscFunctionReturn(0); 760 761 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 762 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 763 for (i=0; i<m; i++) { 764 diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 diag[i] = j; 768 break; 769 } 770 } 771 } 772 a->diag = diag; 773 PetscFunctionReturn(0); 774 } 775 776 777 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 781 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 782 { 783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 784 PetscErrorCode ierr; 785 PetscInt n = a->mbs,i; 786 787 PetscFunctionBegin; 788 *nn = n; 789 if (!ia) PetscFunctionReturn(0); 790 if (symmetric) { 791 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 792 } else if (oshift == 1) { 793 /* temporarily add 1 to i and j indices */ 794 PetscInt nz = a->i[n]; 795 for (i=0; i<nz; i++) a->j[i]++; 796 for (i=0; i<n+1; i++) a->i[i]++; 797 *ia = a->i; *ja = a->j; 798 } else { 799 *ia = a->i; *ja = a->j; 800 } 801 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 807 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 810 PetscErrorCode ierr; 811 PetscInt i,n = a->mbs; 812 813 PetscFunctionBegin; 814 if (!ia) PetscFunctionReturn(0); 815 if (symmetric) { 816 ierr = PetscFree(*ia);CHKERRQ(ierr); 817 ierr = PetscFree(*ja);CHKERRQ(ierr); 818 } else if (oshift == 1) { 819 PetscInt nz = a->i[n]-1; 820 for (i=0; i<nz; i++) a->j[i]--; 821 for (i=0; i<n+1; i++) a->i[i]--; 822 } 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNCT__ 827 #define __FUNCT__ "MatDestroy_SeqBAIJ" 828 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 829 { 830 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 831 PetscErrorCode ierr; 832 833 PetscFunctionBegin; 834 #if defined(PETSC_USE_LOG) 835 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 836 #endif 837 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 838 if (a->row) { 839 ierr = ISDestroy(a->row);CHKERRQ(ierr); 840 } 841 if (a->col) { 842 ierr = ISDestroy(a->col);CHKERRQ(ierr); 843 } 844 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 845 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 846 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 847 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 848 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 849 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 850 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 851 #if defined(PETSC_USE_MAT_SINGLE) 852 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 853 #endif 854 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 855 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 856 857 ierr = PetscFree(a);CHKERRQ(ierr); 858 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatSetOption_SeqBAIJ" 871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 872 { 873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 switch (op) { 878 case MAT_ROW_ORIENTED: 879 a->roworiented = PETSC_TRUE; 880 break; 881 case MAT_COLUMN_ORIENTED: 882 a->roworiented = PETSC_FALSE; 883 break; 884 case MAT_COLUMNS_SORTED: 885 a->sorted = PETSC_TRUE; 886 break; 887 case MAT_COLUMNS_UNSORTED: 888 a->sorted = PETSC_FALSE; 889 break; 890 case MAT_KEEP_ZEROED_ROWS: 891 a->keepzeroedrows = PETSC_TRUE; 892 break; 893 case MAT_NO_NEW_NONZERO_LOCATIONS: 894 a->nonew = 1; 895 break; 896 case MAT_NEW_NONZERO_LOCATION_ERR: 897 a->nonew = -1; 898 break; 899 case MAT_NEW_NONZERO_ALLOCATION_ERR: 900 a->nonew = -2; 901 break; 902 case MAT_YES_NEW_NONZERO_LOCATIONS: 903 a->nonew = 0; 904 break; 905 case MAT_ROWS_SORTED: 906 case MAT_ROWS_UNSORTED: 907 case MAT_YES_NEW_DIAGONALS: 908 case MAT_IGNORE_OFF_PROC_ENTRIES: 909 case MAT_USE_HASH_TABLE: 910 ierr = PetscVerboseInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr); 911 break; 912 case MAT_NO_NEW_DIAGONALS: 913 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 914 case MAT_SYMMETRIC: 915 case MAT_STRUCTURALLY_SYMMETRIC: 916 case MAT_NOT_SYMMETRIC: 917 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 918 case MAT_HERMITIAN: 919 case MAT_NOT_HERMITIAN: 920 case MAT_SYMMETRY_ETERNAL: 921 case MAT_NOT_SYMMETRY_ETERNAL: 922 break; 923 default: 924 SETERRQ(PETSC_ERR_SUP,"unknown option"); 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatGetRow_SeqBAIJ" 931 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 932 { 933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 934 PetscErrorCode ierr; 935 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 936 MatScalar *aa,*aa_i; 937 PetscScalar *v_i; 938 939 PetscFunctionBegin; 940 bs = A->bs; 941 ai = a->i; 942 aj = a->j; 943 aa = a->a; 944 bs2 = a->bs2; 945 946 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 947 948 bn = row/bs; /* Block number */ 949 bp = row % bs; /* Block Position */ 950 M = ai[bn+1] - ai[bn]; 951 *nz = bs*M; 952 953 if (v) { 954 *v = 0; 955 if (*nz) { 956 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 957 for (i=0; i<M; i++) { /* for each block in the block row */ 958 v_i = *v + i*bs; 959 aa_i = aa + bs2*(ai[bn] + i); 960 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 961 } 962 } 963 } 964 965 if (idx) { 966 *idx = 0; 967 if (*nz) { 968 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 969 for (i=0; i<M; i++) { /* for each block in the block row */ 970 idx_i = *idx + i*bs; 971 itmp = bs*aj[ai[bn] + i]; 972 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 973 } 974 } 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 981 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 982 { 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 987 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatTranspose_SeqBAIJ" 993 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 994 { 995 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 996 Mat C; 997 PetscErrorCode ierr; 998 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 999 PetscInt *rows,*cols,bs2=a->bs2; 1000 PetscScalar *array; 1001 1002 PetscFunctionBegin; 1003 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1004 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1005 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1006 1007 #if defined(PETSC_USE_MAT_SINGLE) 1008 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1009 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1010 #else 1011 array = a->a; 1012 #endif 1013 1014 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1015 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1016 ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr); 1017 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1018 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1019 ierr = PetscFree(col);CHKERRQ(ierr); 1020 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1021 cols = rows + bs; 1022 for (i=0; i<mbs; i++) { 1023 cols[0] = i*bs; 1024 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1025 len = ai[i+1] - ai[i]; 1026 for (j=0; j<len; j++) { 1027 rows[0] = (*aj++)*bs; 1028 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1029 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1030 array += bs2; 1031 } 1032 } 1033 ierr = PetscFree(rows);CHKERRQ(ierr); 1034 #if defined(PETSC_USE_MAT_SINGLE) 1035 ierr = PetscFree(array); 1036 #endif 1037 1038 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 1041 if (B) { 1042 *B = C; 1043 } else { 1044 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1045 } 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1051 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1052 { 1053 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1054 PetscErrorCode ierr; 1055 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1056 int fd; 1057 PetscScalar *aa; 1058 FILE *file; 1059 1060 PetscFunctionBegin; 1061 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1062 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1063 col_lens[0] = MAT_FILE_COOKIE; 1064 1065 col_lens[1] = A->m; 1066 col_lens[2] = A->n; 1067 col_lens[3] = a->nz*bs2; 1068 1069 /* store lengths of each row and write (including header) to file */ 1070 count = 0; 1071 for (i=0; i<a->mbs; i++) { 1072 for (j=0; j<bs; j++) { 1073 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1074 } 1075 } 1076 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1077 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1078 1079 /* store column indices (zero start index) */ 1080 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1081 count = 0; 1082 for (i=0; i<a->mbs; i++) { 1083 for (j=0; j<bs; j++) { 1084 for (k=a->i[i]; k<a->i[i+1]; k++) { 1085 for (l=0; l<bs; l++) { 1086 jj[count++] = bs*a->j[k] + l; 1087 } 1088 } 1089 } 1090 } 1091 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1092 ierr = PetscFree(jj);CHKERRQ(ierr); 1093 1094 /* store nonzero values */ 1095 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1096 count = 0; 1097 for (i=0; i<a->mbs; i++) { 1098 for (j=0; j<bs; j++) { 1099 for (k=a->i[i]; k<a->i[i+1]; k++) { 1100 for (l=0; l<bs; l++) { 1101 aa[count++] = a->a[bs2*k + l*bs + j]; 1102 } 1103 } 1104 } 1105 } 1106 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1107 ierr = PetscFree(aa);CHKERRQ(ierr); 1108 1109 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1110 if (file) { 1111 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1118 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1119 { 1120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1121 PetscErrorCode ierr; 1122 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1123 PetscViewerFormat format; 1124 1125 PetscFunctionBegin; 1126 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1127 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1128 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1129 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1130 Mat aij; 1131 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1132 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1133 ierr = MatDestroy(aij);CHKERRQ(ierr); 1134 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1135 PetscFunctionReturn(0); 1136 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1137 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1138 for (i=0; i<a->mbs; i++) { 1139 for (j=0; j<bs; j++) { 1140 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1141 for (k=a->i[i]; k<a->i[i+1]; k++) { 1142 for (l=0; l<bs; l++) { 1143 #if defined(PETSC_USE_COMPLEX) 1144 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1145 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1146 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1147 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1148 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1149 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1150 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1151 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1152 } 1153 #else 1154 if (a->a[bs2*k + l*bs + j] != 0.0) { 1155 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1156 } 1157 #endif 1158 } 1159 } 1160 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1161 } 1162 } 1163 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1164 } else { 1165 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1166 for (i=0; i<a->mbs; i++) { 1167 for (j=0; j<bs; j++) { 1168 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1169 for (k=a->i[i]; k<a->i[i+1]; k++) { 1170 for (l=0; l<bs; l++) { 1171 #if defined(PETSC_USE_COMPLEX) 1172 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1173 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1174 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1175 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1176 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1177 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1178 } else { 1179 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1180 } 1181 #else 1182 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1183 #endif 1184 } 1185 } 1186 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1187 } 1188 } 1189 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1190 } 1191 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1197 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1198 { 1199 Mat A = (Mat) Aa; 1200 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1201 PetscErrorCode ierr; 1202 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1203 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1204 MatScalar *aa; 1205 PetscViewer viewer; 1206 1207 PetscFunctionBegin; 1208 1209 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1210 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1211 1212 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1213 1214 /* loop over matrix elements drawing boxes */ 1215 color = PETSC_DRAW_BLUE; 1216 for (i=0,row=0; i<mbs; i++,row+=bs) { 1217 for (j=a->i[i]; j<a->i[i+1]; j++) { 1218 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1219 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1220 aa = a->a + j*bs2; 1221 for (k=0; k<bs; k++) { 1222 for (l=0; l<bs; l++) { 1223 if (PetscRealPart(*aa++) >= 0.) continue; 1224 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1225 } 1226 } 1227 } 1228 } 1229 color = PETSC_DRAW_CYAN; 1230 for (i=0,row=0; i<mbs; i++,row+=bs) { 1231 for (j=a->i[i]; j<a->i[i+1]; j++) { 1232 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1233 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1234 aa = a->a + j*bs2; 1235 for (k=0; k<bs; k++) { 1236 for (l=0; l<bs; l++) { 1237 if (PetscRealPart(*aa++) != 0.) continue; 1238 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1239 } 1240 } 1241 } 1242 } 1243 1244 color = PETSC_DRAW_RED; 1245 for (i=0,row=0; i<mbs; i++,row+=bs) { 1246 for (j=a->i[i]; j<a->i[i+1]; j++) { 1247 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1248 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1249 aa = a->a + j*bs2; 1250 for (k=0; k<bs; k++) { 1251 for (l=0; l<bs; l++) { 1252 if (PetscRealPart(*aa++) <= 0.) continue; 1253 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1254 } 1255 } 1256 } 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1263 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1264 { 1265 PetscErrorCode ierr; 1266 PetscReal xl,yl,xr,yr,w,h; 1267 PetscDraw draw; 1268 PetscTruth isnull; 1269 1270 PetscFunctionBegin; 1271 1272 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1273 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1274 1275 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1276 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1277 xr += w; yr += h; xl = -w; yl = -h; 1278 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1279 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1280 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatView_SeqBAIJ" 1286 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1287 { 1288 PetscErrorCode ierr; 1289 PetscTruth iascii,isbinary,isdraw; 1290 1291 PetscFunctionBegin; 1292 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1294 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1295 if (iascii){ 1296 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1297 } else if (isbinary) { 1298 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1299 } else if (isdraw) { 1300 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1301 } else { 1302 Mat B; 1303 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1304 ierr = MatView(B,viewer);CHKERRQ(ierr); 1305 ierr = MatDestroy(B);CHKERRQ(ierr); 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1313 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1314 { 1315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1316 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1317 PetscInt *ai = a->i,*ailen = a->ilen; 1318 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1319 MatScalar *ap,*aa = a->a,zero = 0.0; 1320 1321 PetscFunctionBegin; 1322 for (k=0; k<m; k++) { /* loop over rows */ 1323 row = im[k]; brow = row/bs; 1324 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1325 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1326 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1327 nrow = ailen[brow]; 1328 for (l=0; l<n; l++) { /* loop over columns */ 1329 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1330 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1331 col = in[l] ; 1332 bcol = col/bs; 1333 cidx = col%bs; 1334 ridx = row%bs; 1335 high = nrow; 1336 low = 0; /* assume unsorted */ 1337 while (high-low > 5) { 1338 t = (low+high)/2; 1339 if (rp[t] > bcol) high = t; 1340 else low = t; 1341 } 1342 for (i=low; i<high; i++) { 1343 if (rp[i] > bcol) break; 1344 if (rp[i] == bcol) { 1345 *v++ = ap[bs2*i+bs*cidx+ridx]; 1346 goto finished; 1347 } 1348 } 1349 *v++ = zero; 1350 finished:; 1351 } 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #if defined(PETSC_USE_MAT_SINGLE) 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1359 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1360 { 1361 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1362 PetscErrorCode ierr; 1363 PetscInt i,N = m*n*b->bs2; 1364 MatScalar *vsingle; 1365 1366 PetscFunctionBegin; 1367 if (N > b->setvalueslen) { 1368 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1369 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1370 b->setvalueslen = N; 1371 } 1372 vsingle = b->setvaluescopy; 1373 for (i=0; i<N; i++) { 1374 vsingle[i] = v[i]; 1375 } 1376 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 #endif 1380 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1384 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1385 { 1386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1387 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1388 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1389 PetscErrorCode ierr; 1390 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1391 PetscTruth roworiented=a->roworiented; 1392 const MatScalar *value = v; 1393 MatScalar *ap,*aa = a->a,*bap; 1394 1395 PetscFunctionBegin; 1396 if (roworiented) { 1397 stepval = (n-1)*bs; 1398 } else { 1399 stepval = (m-1)*bs; 1400 } 1401 for (k=0; k<m; k++) { /* loop over added rows */ 1402 row = im[k]; 1403 if (row < 0) continue; 1404 #if defined(PETSC_USE_DEBUG) 1405 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1406 #endif 1407 rp = aj + ai[row]; 1408 ap = aa + bs2*ai[row]; 1409 rmax = imax[row]; 1410 nrow = ailen[row]; 1411 low = 0; 1412 high = nrow; 1413 for (l=0; l<n; l++) { /* loop over added columns */ 1414 if (in[l] < 0) continue; 1415 #if defined(PETSC_USE_DEBUG) 1416 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1417 #endif 1418 col = in[l]; 1419 if (roworiented) { 1420 value = v + k*(stepval+bs)*bs + l*bs; 1421 } else { 1422 value = v + l*(stepval+bs)*bs + k*bs; 1423 } 1424 if (col <= lastcol) low = 0; else high = nrow; 1425 lastcol = col; 1426 while (high-low > 7) { 1427 t = (low+high)/2; 1428 if (rp[t] > col) high = t; 1429 else low = t; 1430 } 1431 for (i=low; i<high; i++) { 1432 if (rp[i] > col) break; 1433 if (rp[i] == col) { 1434 bap = ap + bs2*i; 1435 if (roworiented) { 1436 if (is == ADD_VALUES) { 1437 for (ii=0; ii<bs; ii++,value+=stepval) { 1438 for (jj=ii; jj<bs2; jj+=bs) { 1439 bap[jj] += *value++; 1440 } 1441 } 1442 } else { 1443 for (ii=0; ii<bs; ii++,value+=stepval) { 1444 for (jj=ii; jj<bs2; jj+=bs) { 1445 bap[jj] = *value++; 1446 } 1447 } 1448 } 1449 } else { 1450 if (is == ADD_VALUES) { 1451 for (ii=0; ii<bs; ii++,value+=stepval) { 1452 for (jj=0; jj<bs; jj++) { 1453 *bap++ += *value++; 1454 } 1455 } 1456 } else { 1457 for (ii=0; ii<bs; ii++,value+=stepval) { 1458 for (jj=0; jj<bs; jj++) { 1459 *bap++ = *value++; 1460 } 1461 } 1462 } 1463 } 1464 goto noinsert2; 1465 } 1466 } 1467 if (nonew == 1) goto noinsert2; 1468 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1469 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1470 N = nrow++ - 1; high++; 1471 /* shift up all the later entries in this row */ 1472 for (ii=N; ii>=i; ii--) { 1473 rp[ii+1] = rp[ii]; 1474 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1475 } 1476 if (N >= i) { 1477 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1478 } 1479 rp[i] = col; 1480 bap = ap + bs2*i; 1481 if (roworiented) { 1482 for (ii=0; ii<bs; ii++,value+=stepval) { 1483 for (jj=ii; jj<bs2; jj+=bs) { 1484 bap[jj] = *value++; 1485 } 1486 } 1487 } else { 1488 for (ii=0; ii<bs; ii++,value+=stepval) { 1489 for (jj=0; jj<bs; jj++) { 1490 *bap++ = *value++; 1491 } 1492 } 1493 } 1494 noinsert2:; 1495 low = i; 1496 } 1497 ailen[row] = nrow; 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1504 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1505 { 1506 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1507 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1508 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1509 PetscErrorCode ierr; 1510 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1511 MatScalar *aa = a->a,*ap; 1512 PetscReal ratio=0.6; 1513 1514 PetscFunctionBegin; 1515 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1516 1517 if (m) rmax = ailen[0]; 1518 for (i=1; i<mbs; i++) { 1519 /* move each row back by the amount of empty slots (fshift) before it*/ 1520 fshift += imax[i-1] - ailen[i-1]; 1521 rmax = PetscMax(rmax,ailen[i]); 1522 if (fshift) { 1523 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1524 N = ailen[i]; 1525 for (j=0; j<N; j++) { 1526 ip[j-fshift] = ip[j]; 1527 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1528 } 1529 } 1530 ai[i] = ai[i-1] + ailen[i-1]; 1531 } 1532 if (mbs) { 1533 fshift += imax[mbs-1] - ailen[mbs-1]; 1534 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1535 } 1536 /* reset ilen and imax for each row */ 1537 for (i=0; i<mbs; i++) { 1538 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1539 } 1540 a->nz = ai[mbs]; 1541 1542 /* diagonals may have moved, so kill the diagonal pointers */ 1543 a->idiagvalid = PETSC_FALSE; 1544 if (fshift && a->diag) { 1545 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1546 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1547 a->diag = 0; 1548 } 1549 ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 1550 ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 1551 ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 1552 a->reallocs = 0; 1553 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1554 1555 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1556 if (a->compressedrow.use){ 1557 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1558 } 1559 1560 A->same_nonzero = PETSC_TRUE; 1561 PetscFunctionReturn(0); 1562 } 1563 1564 /* 1565 This function returns an array of flags which indicate the locations of contiguous 1566 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1567 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1568 Assume: sizes should be long enough to hold all the values. 1569 */ 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1572 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1573 { 1574 PetscInt i,j,k,row; 1575 PetscTruth flg; 1576 1577 PetscFunctionBegin; 1578 for (i=0,j=0; i<n; j++) { 1579 row = idx[i]; 1580 if (row%bs!=0) { /* Not the begining of a block */ 1581 sizes[j] = 1; 1582 i++; 1583 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1584 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1585 i++; 1586 } else { /* Begining of the block, so check if the complete block exists */ 1587 flg = PETSC_TRUE; 1588 for (k=1; k<bs; k++) { 1589 if (row+k != idx[i+k]) { /* break in the block */ 1590 flg = PETSC_FALSE; 1591 break; 1592 } 1593 } 1594 if (flg) { /* No break in the bs */ 1595 sizes[j] = bs; 1596 i+= bs; 1597 } else { 1598 sizes[j] = 1; 1599 i++; 1600 } 1601 } 1602 } 1603 *bs_max = j; 1604 PetscFunctionReturn(0); 1605 } 1606 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1609 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1610 { 1611 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1612 PetscErrorCode ierr; 1613 PetscInt i,j,k,count,*rows; 1614 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1615 PetscScalar zero = 0.0; 1616 MatScalar *aa; 1617 1618 PetscFunctionBegin; 1619 /* Make a copy of the IS and sort it */ 1620 /* allocate memory for rows,sizes */ 1621 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1622 sizes = rows + is_n; 1623 1624 /* copy IS values to rows, and sort them */ 1625 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1626 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1627 if (baij->keepzeroedrows) { 1628 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1629 bs_max = is_n; 1630 A->same_nonzero = PETSC_TRUE; 1631 } else { 1632 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1633 A->same_nonzero = PETSC_FALSE; 1634 } 1635 1636 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1637 row = rows[j]; 1638 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1639 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1640 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1641 if (sizes[i] == bs && !baij->keepzeroedrows) { 1642 if (diag != 0.0) { 1643 if (baij->ilen[row/bs] > 0) { 1644 baij->ilen[row/bs] = 1; 1645 baij->j[baij->i[row/bs]] = row/bs; 1646 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1647 } 1648 /* Now insert all the diagonal values for this bs */ 1649 for (k=0; k<bs; k++) { 1650 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1651 } 1652 } else { /* (diag == 0.0) */ 1653 baij->ilen[row/bs] = 0; 1654 } /* end (diag == 0.0) */ 1655 } else { /* (sizes[i] != bs) */ 1656 #if defined (PETSC_USE_DEBUG) 1657 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1658 #endif 1659 for (k=0; k<count; k++) { 1660 aa[0] = zero; 1661 aa += bs; 1662 } 1663 if (diag != 0.0) { 1664 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 } 1667 } 1668 1669 ierr = PetscFree(rows);CHKERRQ(ierr); 1670 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1676 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1677 { 1678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1679 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1680 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1681 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1682 PetscErrorCode ierr; 1683 PetscInt ridx,cidx,bs2=a->bs2; 1684 PetscTruth roworiented=a->roworiented; 1685 MatScalar *ap,value,*aa=a->a,*bap; 1686 1687 PetscFunctionBegin; 1688 for (k=0; k<m; k++) { /* loop over added rows */ 1689 row = im[k]; 1690 brow = row/bs; 1691 if (row < 0) continue; 1692 #if defined(PETSC_USE_DEBUG) 1693 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1694 #endif 1695 rp = aj + ai[brow]; 1696 ap = aa + bs2*ai[brow]; 1697 rmax = imax[brow]; 1698 nrow = ailen[brow]; 1699 low = 0; 1700 high = nrow; 1701 for (l=0; l<n; l++) { /* loop over added columns */ 1702 if (in[l] < 0) continue; 1703 #if defined(PETSC_USE_DEBUG) 1704 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1705 #endif 1706 col = in[l]; bcol = col/bs; 1707 ridx = row % bs; cidx = col % bs; 1708 if (roworiented) { 1709 value = v[l + k*n]; 1710 } else { 1711 value = v[k + l*m]; 1712 } 1713 if (col <= lastcol) low = 0; else high = nrow; 1714 lastcol = col; 1715 while (high-low > 7) { 1716 t = (low+high)/2; 1717 if (rp[t] > bcol) high = t; 1718 else low = t; 1719 } 1720 for (i=low; i<high; i++) { 1721 if (rp[i] > bcol) break; 1722 if (rp[i] == bcol) { 1723 bap = ap + bs2*i + bs*cidx + ridx; 1724 if (is == ADD_VALUES) *bap += value; 1725 else *bap = value; 1726 goto noinsert1; 1727 } 1728 } 1729 if (nonew == 1) goto noinsert1; 1730 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1731 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1732 N = nrow++ - 1; high++; 1733 /* shift up all the later entries in this row */ 1734 for (ii=N; ii>=i; ii--) { 1735 rp[ii+1] = rp[ii]; 1736 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1737 } 1738 if (N>=i) { 1739 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1740 } 1741 rp[i] = bcol; 1742 ap[bs2*i + bs*cidx + ridx] = value; 1743 a->nz++; 1744 noinsert1:; 1745 low = i; 1746 } 1747 ailen[brow] = nrow; 1748 } 1749 A->same_nonzero = PETSC_FALSE; 1750 PetscFunctionReturn(0); 1751 } 1752 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1756 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1757 { 1758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1759 Mat outA; 1760 PetscErrorCode ierr; 1761 PetscTruth row_identity,col_identity; 1762 1763 PetscFunctionBegin; 1764 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1765 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1766 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1767 if (!row_identity || !col_identity) { 1768 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1769 } 1770 1771 outA = inA; 1772 inA->factor = FACTOR_LU; 1773 1774 if (!a->diag) { 1775 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1776 } 1777 1778 a->row = row; 1779 a->col = col; 1780 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1781 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1782 1783 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1784 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1785 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1786 1787 /* 1788 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1789 for ILU(0) factorization with natural ordering 1790 */ 1791 if (inA->bs < 8) { 1792 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1793 } else { 1794 if (!a->solve_work) { 1795 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1796 ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1797 } 1798 } 1799 1800 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1801 1802 PetscFunctionReturn(0); 1803 } 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1806 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1807 { 1808 static PetscTruth called = PETSC_FALSE; 1809 MPI_Comm comm = A->comm; 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1814 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1815 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1816 PetscFunctionReturn(0); 1817 } 1818 1819 EXTERN_C_BEGIN 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1822 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1823 { 1824 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1825 PetscInt i,nz,nbs; 1826 1827 PetscFunctionBegin; 1828 nz = baij->maxnz/baij->bs2; 1829 nbs = baij->nbs; 1830 for (i=0; i<nz; i++) { 1831 baij->j[i] = indices[i]; 1832 } 1833 baij->nz = nz; 1834 for (i=0; i<nbs; i++) { 1835 baij->ilen[i] = baij->imax[i]; 1836 } 1837 1838 PetscFunctionReturn(0); 1839 } 1840 EXTERN_C_END 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1844 /*@ 1845 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1846 in the matrix. 1847 1848 Input Parameters: 1849 + mat - the SeqBAIJ matrix 1850 - indices - the column indices 1851 1852 Level: advanced 1853 1854 Notes: 1855 This can be called if you have precomputed the nonzero structure of the 1856 matrix and want to provide it to the matrix object to improve the performance 1857 of the MatSetValues() operation. 1858 1859 You MUST have set the correct numbers of nonzeros per row in the call to 1860 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1861 1862 MUST be called before any calls to MatSetValues(); 1863 1864 @*/ 1865 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1866 { 1867 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1868 1869 PetscFunctionBegin; 1870 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1871 PetscValidPointer(indices,2); 1872 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1873 if (f) { 1874 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1875 } else { 1876 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1877 } 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1883 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1884 { 1885 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1886 PetscErrorCode ierr; 1887 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1888 PetscReal atmp; 1889 PetscScalar *x,zero = 0.0; 1890 MatScalar *aa; 1891 PetscInt ncols,brow,krow,kcol; 1892 1893 PetscFunctionBegin; 1894 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1895 bs = A->bs; 1896 aa = a->a; 1897 ai = a->i; 1898 aj = a->j; 1899 mbs = a->mbs; 1900 1901 ierr = VecSet(v,zero);CHKERRQ(ierr); 1902 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1903 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1904 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1905 for (i=0; i<mbs; i++) { 1906 ncols = ai[1] - ai[0]; ai++; 1907 brow = bs*i; 1908 for (j=0; j<ncols; j++){ 1909 /* bcol = bs*(*aj); */ 1910 for (kcol=0; kcol<bs; kcol++){ 1911 for (krow=0; krow<bs; krow++){ 1912 atmp = PetscAbsScalar(*aa); aa++; 1913 row = brow + krow; /* row index */ 1914 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1915 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1916 } 1917 } 1918 aj++; 1919 } 1920 } 1921 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatCopy_SeqBAIJ" 1927 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1928 { 1929 PetscErrorCode ierr; 1930 1931 PetscFunctionBegin; 1932 /* If the two matrices have the same copy implementation, use fast copy. */ 1933 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1935 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1936 1937 if (a->i[A->m] != b->i[B->m]) { 1938 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1939 } 1940 ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr); 1941 } else { 1942 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1943 } 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1949 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1950 { 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1960 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1961 { 1962 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1963 PetscFunctionBegin; 1964 *array = a->a; 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1970 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1971 { 1972 PetscFunctionBegin; 1973 PetscFunctionReturn(0); 1974 } 1975 1976 #include "petscblaslapack.h" 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1979 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1980 { 1981 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1982 PetscErrorCode ierr; 1983 PetscInt i,bs=Y->bs,j,bs2; 1984 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1985 1986 PetscFunctionBegin; 1987 if (str == SAME_NONZERO_PATTERN) { 1988 PetscScalar alpha = a; 1989 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1990 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1991 if (y->xtoy && y->XtoY != X) { 1992 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1993 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1994 } 1995 if (!y->xtoy) { /* get xtoy */ 1996 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1997 y->XtoY = X; 1998 } 1999 bs2 = bs*bs; 2000 for (i=0; i<x->nz; i++) { 2001 j = 0; 2002 while (j < bs2){ 2003 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2004 j++; 2005 } 2006 } 2007 ierr = PetscVerboseInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2008 } else { 2009 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2010 } 2011 PetscFunctionReturn(0); 2012 } 2013 2014 #undef __FUNCT__ 2015 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2016 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2017 { 2018 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2019 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2020 PetscScalar *aa = a->a; 2021 2022 PetscFunctionBegin; 2023 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2029 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2030 { 2031 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2032 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2033 PetscScalar *aa = a->a; 2034 2035 PetscFunctionBegin; 2036 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 2041 /* -------------------------------------------------------------------*/ 2042 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2043 MatGetRow_SeqBAIJ, 2044 MatRestoreRow_SeqBAIJ, 2045 MatMult_SeqBAIJ_N, 2046 /* 4*/ MatMultAdd_SeqBAIJ_N, 2047 MatMultTranspose_SeqBAIJ, 2048 MatMultTransposeAdd_SeqBAIJ, 2049 MatSolve_SeqBAIJ_N, 2050 0, 2051 0, 2052 /*10*/ 0, 2053 MatLUFactor_SeqBAIJ, 2054 0, 2055 0, 2056 MatTranspose_SeqBAIJ, 2057 /*15*/ MatGetInfo_SeqBAIJ, 2058 MatEqual_SeqBAIJ, 2059 MatGetDiagonal_SeqBAIJ, 2060 MatDiagonalScale_SeqBAIJ, 2061 MatNorm_SeqBAIJ, 2062 /*20*/ 0, 2063 MatAssemblyEnd_SeqBAIJ, 2064 0, 2065 MatSetOption_SeqBAIJ, 2066 MatZeroEntries_SeqBAIJ, 2067 /*25*/ MatZeroRows_SeqBAIJ, 2068 MatLUFactorSymbolic_SeqBAIJ, 2069 MatLUFactorNumeric_SeqBAIJ_N, 2070 MatCholeskyFactorSymbolic_SeqBAIJ, 2071 MatCholeskyFactorNumeric_SeqBAIJ_N, 2072 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2073 MatILUFactorSymbolic_SeqBAIJ, 2074 MatICCFactorSymbolic_SeqBAIJ, 2075 MatGetArray_SeqBAIJ, 2076 MatRestoreArray_SeqBAIJ, 2077 /*35*/ MatDuplicate_SeqBAIJ, 2078 0, 2079 0, 2080 MatILUFactor_SeqBAIJ, 2081 0, 2082 /*40*/ MatAXPY_SeqBAIJ, 2083 MatGetSubMatrices_SeqBAIJ, 2084 MatIncreaseOverlap_SeqBAIJ, 2085 MatGetValues_SeqBAIJ, 2086 MatCopy_SeqBAIJ, 2087 /*45*/ MatPrintHelp_SeqBAIJ, 2088 MatScale_SeqBAIJ, 2089 0, 2090 0, 2091 0, 2092 /*50*/ 0, 2093 MatGetRowIJ_SeqBAIJ, 2094 MatRestoreRowIJ_SeqBAIJ, 2095 0, 2096 0, 2097 /*55*/ 0, 2098 0, 2099 0, 2100 0, 2101 MatSetValuesBlocked_SeqBAIJ, 2102 /*60*/ MatGetSubMatrix_SeqBAIJ, 2103 MatDestroy_SeqBAIJ, 2104 MatView_SeqBAIJ, 2105 MatGetPetscMaps_Petsc, 2106 0, 2107 /*65*/ 0, 2108 0, 2109 0, 2110 0, 2111 0, 2112 /*70*/ MatGetRowMax_SeqBAIJ, 2113 MatConvert_Basic, 2114 0, 2115 0, 2116 0, 2117 /*75*/ 0, 2118 0, 2119 0, 2120 0, 2121 0, 2122 /*80*/ 0, 2123 0, 2124 0, 2125 0, 2126 MatLoad_SeqBAIJ, 2127 /*85*/ 0, 2128 0, 2129 0, 2130 0, 2131 0, 2132 /*90*/ 0, 2133 0, 2134 0, 2135 0, 2136 0, 2137 /*95*/ 0, 2138 0, 2139 0, 2140 0, 2141 0, 2142 /*100*/0, 2143 0, 2144 0, 2145 0, 2146 0, 2147 /*105*/0, 2148 MatRealPart_SeqBAIJ, 2149 MatImaginaryPart_SeqBAIJ 2150 }; 2151 2152 EXTERN_C_BEGIN 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2155 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2156 { 2157 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2158 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 if (aij->nonew != 1) { 2163 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2164 } 2165 2166 /* allocate space for values if not already there */ 2167 if (!aij->saved_values) { 2168 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2169 } 2170 2171 /* copy values over */ 2172 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 EXTERN_C_END 2176 2177 EXTERN_C_BEGIN 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2180 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2181 { 2182 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2183 PetscErrorCode ierr; 2184 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2185 2186 PetscFunctionBegin; 2187 if (aij->nonew != 1) { 2188 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2189 } 2190 if (!aij->saved_values) { 2191 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2192 } 2193 2194 /* copy values over */ 2195 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2196 PetscFunctionReturn(0); 2197 } 2198 EXTERN_C_END 2199 2200 EXTERN_C_BEGIN 2201 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2202 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2203 EXTERN_C_END 2204 2205 EXTERN_C_BEGIN 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2208 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2209 { 2210 Mat_SeqBAIJ *b; 2211 PetscErrorCode ierr; 2212 PetscInt i,mbs,nbs,bs2,newbs = bs; 2213 PetscTruth flg,skipallocation = PETSC_FALSE; 2214 2215 PetscFunctionBegin; 2216 2217 if (nz == MAT_SKIP_ALLOCATION) { 2218 skipallocation = PETSC_TRUE; 2219 nz = 0; 2220 } 2221 2222 B->preallocated = PETSC_TRUE; 2223 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2224 if (nnz && newbs != bs) { 2225 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2226 } 2227 bs = newbs; 2228 2229 mbs = B->m/bs; 2230 nbs = B->n/bs; 2231 bs2 = bs*bs; 2232 2233 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2234 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2235 } 2236 2237 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2238 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2239 if (nnz) { 2240 for (i=0; i<mbs; i++) { 2241 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2242 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2243 } 2244 } 2245 2246 b = (Mat_SeqBAIJ*)B->data; 2247 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2248 B->ops->solve = MatSolve_SeqBAIJ_Update; 2249 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2250 if (!flg) { 2251 switch (bs) { 2252 case 1: 2253 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2254 B->ops->mult = MatMult_SeqBAIJ_1; 2255 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2256 break; 2257 case 2: 2258 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2259 B->ops->mult = MatMult_SeqBAIJ_2; 2260 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2261 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2262 break; 2263 case 3: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2265 B->ops->mult = MatMult_SeqBAIJ_3; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2267 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2268 break; 2269 case 4: 2270 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2271 B->ops->mult = MatMult_SeqBAIJ_4; 2272 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2273 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2274 break; 2275 case 5: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2277 B->ops->mult = MatMult_SeqBAIJ_5; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2279 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2280 break; 2281 case 6: 2282 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2283 B->ops->mult = MatMult_SeqBAIJ_6; 2284 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2285 break; 2286 case 7: 2287 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2288 B->ops->mult = MatMult_SeqBAIJ_7; 2289 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2290 break; 2291 default: 2292 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2293 B->ops->mult = MatMult_SeqBAIJ_N; 2294 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2295 break; 2296 } 2297 } 2298 B->bs = bs; 2299 b->mbs = mbs; 2300 b->nbs = nbs; 2301 if (!skipallocation) { 2302 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2303 /* b->ilen will count nonzeros in each block row so far. */ 2304 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2305 if (!nnz) { 2306 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2307 else if (nz <= 0) nz = 1; 2308 for (i=0; i<mbs; i++) b->imax[i] = nz; 2309 nz = nz*mbs; 2310 } else { 2311 nz = 0; 2312 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2313 } 2314 2315 /* allocate the matrix space */ 2316 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 2317 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2318 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2319 b->singlemalloc = PETSC_TRUE; 2320 2321 b->i[0] = 0; 2322 for (i=1; i<mbs+1; i++) { 2323 b->i[i] = b->i[i-1] + b->imax[i-1]; 2324 } 2325 } 2326 2327 B->bs = bs; 2328 b->bs2 = bs2; 2329 b->mbs = mbs; 2330 b->nz = 0; 2331 b->maxnz = nz*bs2; 2332 B->info.nz_unneeded = (PetscReal)b->maxnz; 2333 PetscFunctionReturn(0); 2334 } 2335 EXTERN_C_END 2336 2337 /*MC 2338 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2339 block sparse compressed row format. 2340 2341 Options Database Keys: 2342 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2343 2344 Level: beginner 2345 2346 .seealso: MatCreateSeqBAIJ() 2347 M*/ 2348 2349 EXTERN_C_BEGIN 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatCreate_SeqBAIJ" 2352 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2353 { 2354 PetscErrorCode ierr; 2355 PetscMPIInt size; 2356 Mat_SeqBAIJ *b; 2357 2358 PetscFunctionBegin; 2359 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2360 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2361 2362 B->m = B->M = PetscMax(B->m,B->M); 2363 B->n = B->N = PetscMax(B->n,B->N); 2364 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2365 B->data = (void*)b; 2366 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2367 B->factor = 0; 2368 B->mapping = 0; 2369 b->row = 0; 2370 b->col = 0; 2371 b->icol = 0; 2372 b->reallocs = 0; 2373 b->saved_values = 0; 2374 #if defined(PETSC_USE_MAT_SINGLE) 2375 b->setvalueslen = 0; 2376 b->setvaluescopy = PETSC_NULL; 2377 #endif 2378 2379 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2380 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2381 2382 b->sorted = PETSC_FALSE; 2383 b->roworiented = PETSC_TRUE; 2384 b->nonew = 0; 2385 b->diag = 0; 2386 b->solve_work = 0; 2387 b->mult_work = 0; 2388 B->spptr = 0; 2389 B->info.nz_unneeded = (PetscReal)b->maxnz; 2390 b->keepzeroedrows = PETSC_FALSE; 2391 b->xtoy = 0; 2392 b->XtoY = 0; 2393 b->compressedrow.use = PETSC_FALSE; 2394 b->compressedrow.nrows = 0; 2395 b->compressedrow.i = PETSC_NULL; 2396 b->compressedrow.rindex = PETSC_NULL; 2397 b->compressedrow.checked = PETSC_FALSE; 2398 B->same_nonzero = PETSC_FALSE; 2399 2400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2401 "MatInvertBlockDiagonal_SeqBAIJ", 2402 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2404 "MatStoreValues_SeqBAIJ", 2405 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2407 "MatRetrieveValues_SeqBAIJ", 2408 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2410 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2411 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2413 "MatConvert_SeqBAIJ_SeqAIJ", 2414 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2416 "MatConvert_SeqBAIJ_SeqSBAIJ", 2417 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2418 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2419 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2420 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 EXTERN_C_END 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2427 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2428 { 2429 Mat C; 2430 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2431 PetscErrorCode ierr; 2432 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2433 2434 PetscFunctionBegin; 2435 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2436 2437 *B = 0; 2438 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2439 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 2440 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2441 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2442 c = (Mat_SeqBAIJ*)C->data; 2443 2444 C->M = A->M; 2445 C->N = A->N; 2446 C->bs = A->bs; 2447 c->bs2 = a->bs2; 2448 c->mbs = a->mbs; 2449 c->nbs = a->nbs; 2450 2451 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2452 for (i=0; i<mbs; i++) { 2453 c->imax[i] = a->imax[i]; 2454 c->ilen[i] = a->ilen[i]; 2455 } 2456 2457 /* allocate the matrix space */ 2458 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2459 c->singlemalloc = PETSC_TRUE; 2460 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2461 if (mbs > 0) { 2462 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2463 if (cpvalues == MAT_COPY_VALUES) { 2464 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2465 } else { 2466 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2467 } 2468 } 2469 c->sorted = a->sorted; 2470 c->roworiented = a->roworiented; 2471 c->nonew = a->nonew; 2472 2473 if (a->diag) { 2474 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2475 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2476 for (i=0; i<mbs; i++) { 2477 c->diag[i] = a->diag[i]; 2478 } 2479 } else c->diag = 0; 2480 c->nz = a->nz; 2481 c->maxnz = a->maxnz; 2482 c->solve_work = 0; 2483 c->mult_work = 0; 2484 C->preallocated = PETSC_TRUE; 2485 C->assembled = PETSC_TRUE; 2486 2487 c->compressedrow.use = a->compressedrow.use; 2488 c->compressedrow.nrows = a->compressedrow.nrows; 2489 c->compressedrow.checked = a->compressedrow.checked; 2490 if ( a->compressedrow.checked && a->compressedrow.use){ 2491 i = a->compressedrow.nrows; 2492 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2493 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2494 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2495 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2496 } else { 2497 c->compressedrow.use = PETSC_FALSE; 2498 c->compressedrow.i = PETSC_NULL; 2499 c->compressedrow.rindex = PETSC_NULL; 2500 } 2501 C->same_nonzero = A->same_nonzero; 2502 *B = C; 2503 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 #undef __FUNCT__ 2508 #define __FUNCT__ "MatLoad_SeqBAIJ" 2509 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2510 { 2511 Mat_SeqBAIJ *a; 2512 Mat B; 2513 PetscErrorCode ierr; 2514 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2515 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2516 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2517 PetscInt *masked,nmask,tmp,bs2,ishift; 2518 PetscMPIInt size; 2519 int fd; 2520 PetscScalar *aa; 2521 MPI_Comm comm = ((PetscObject)viewer)->comm; 2522 2523 PetscFunctionBegin; 2524 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2525 bs2 = bs*bs; 2526 2527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2528 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2529 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2530 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2531 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2532 M = header[1]; N = header[2]; nz = header[3]; 2533 2534 if (header[3] < 0) { 2535 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2536 } 2537 2538 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2539 2540 /* 2541 This code adds extra rows to make sure the number of rows is 2542 divisible by the blocksize 2543 */ 2544 mbs = M/bs; 2545 extra_rows = bs - M + bs*(mbs); 2546 if (extra_rows == bs) extra_rows = 0; 2547 else mbs++; 2548 if (extra_rows) { 2549 ierr = PetscVerboseInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 2550 } 2551 2552 /* read in row lengths */ 2553 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2554 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2555 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2556 2557 /* read in column indices */ 2558 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2559 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2560 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2561 2562 /* loop over row lengths determining block row lengths */ 2563 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2564 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2565 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2566 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2567 masked = mask + mbs; 2568 rowcount = 0; nzcount = 0; 2569 for (i=0; i<mbs; i++) { 2570 nmask = 0; 2571 for (j=0; j<bs; j++) { 2572 kmax = rowlengths[rowcount]; 2573 for (k=0; k<kmax; k++) { 2574 tmp = jj[nzcount++]/bs; 2575 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2576 } 2577 rowcount++; 2578 } 2579 browlengths[i] += nmask; 2580 /* zero out the mask elements we set */ 2581 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2582 } 2583 2584 /* create our matrix */ 2585 ierr = MatCreate(comm,&B); 2586 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2587 ierr = MatSetType(B,type);CHKERRQ(ierr); 2588 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2589 a = (Mat_SeqBAIJ*)B->data; 2590 2591 /* set matrix "i" values */ 2592 a->i[0] = 0; 2593 for (i=1; i<= mbs; i++) { 2594 a->i[i] = a->i[i-1] + browlengths[i-1]; 2595 a->ilen[i-1] = browlengths[i-1]; 2596 } 2597 a->nz = 0; 2598 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2599 2600 /* read in nonzero values */ 2601 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2602 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2603 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2604 2605 /* set "a" and "j" values into matrix */ 2606 nzcount = 0; jcount = 0; 2607 for (i=0; i<mbs; i++) { 2608 nzcountb = nzcount; 2609 nmask = 0; 2610 for (j=0; j<bs; j++) { 2611 kmax = rowlengths[i*bs+j]; 2612 for (k=0; k<kmax; k++) { 2613 tmp = jj[nzcount++]/bs; 2614 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2615 } 2616 } 2617 /* sort the masked values */ 2618 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2619 2620 /* set "j" values into matrix */ 2621 maskcount = 1; 2622 for (j=0; j<nmask; j++) { 2623 a->j[jcount++] = masked[j]; 2624 mask[masked[j]] = maskcount++; 2625 } 2626 /* set "a" values into matrix */ 2627 ishift = bs2*a->i[i]; 2628 for (j=0; j<bs; j++) { 2629 kmax = rowlengths[i*bs+j]; 2630 for (k=0; k<kmax; k++) { 2631 tmp = jj[nzcountb]/bs ; 2632 block = mask[tmp] - 1; 2633 point = jj[nzcountb] - bs*tmp; 2634 idx = ishift + bs2*block + j + bs*point; 2635 a->a[idx] = (MatScalar)aa[nzcountb++]; 2636 } 2637 } 2638 /* zero out the mask elements we set */ 2639 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2640 } 2641 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2642 2643 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2644 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2645 ierr = PetscFree(aa);CHKERRQ(ierr); 2646 ierr = PetscFree(jj);CHKERRQ(ierr); 2647 ierr = PetscFree(mask);CHKERRQ(ierr); 2648 2649 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = MatView_Private(B);CHKERRQ(ierr); 2652 2653 *A = B; 2654 PetscFunctionReturn(0); 2655 } 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatCreateSeqBAIJ" 2659 /*@C 2660 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2661 compressed row) format. For good matrix assembly performance the 2662 user should preallocate the matrix storage by setting the parameter nz 2663 (or the array nnz). By setting these parameters accurately, performance 2664 during matrix assembly can be increased by more than a factor of 50. 2665 2666 Collective on MPI_Comm 2667 2668 Input Parameters: 2669 + comm - MPI communicator, set to PETSC_COMM_SELF 2670 . bs - size of block 2671 . m - number of rows 2672 . n - number of columns 2673 . nz - number of nonzero blocks per block row (same for all rows) 2674 - nnz - array containing the number of nonzero blocks in the various block rows 2675 (possibly different for each block row) or PETSC_NULL 2676 2677 Output Parameter: 2678 . A - the matrix 2679 2680 Options Database Keys: 2681 . -mat_no_unroll - uses code that does not unroll the loops in the 2682 block calculations (much slower) 2683 . -mat_block_size - size of the blocks to use 2684 2685 Level: intermediate 2686 2687 Notes: 2688 The number of rows and columns must be divisible by blocksize. 2689 2690 If the nnz parameter is given then the nz parameter is ignored 2691 2692 A nonzero block is any block that as 1 or more nonzeros in it 2693 2694 The block AIJ format is fully compatible with standard Fortran 77 2695 storage. That is, the stored row and column indices can begin at 2696 either one (as in Fortran) or zero. See the users' manual for details. 2697 2698 Specify the preallocated storage with either nz or nnz (not both). 2699 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2700 allocation. For additional details, see the users manual chapter on 2701 matrices. 2702 2703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2704 @*/ 2705 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2706 { 2707 PetscErrorCode ierr; 2708 2709 PetscFunctionBegin; 2710 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2711 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2712 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2713 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2714 PetscFunctionReturn(0); 2715 } 2716 2717 #undef __FUNCT__ 2718 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2719 /*@C 2720 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2721 per row in the matrix. For good matrix assembly performance the 2722 user should preallocate the matrix storage by setting the parameter nz 2723 (or the array nnz). By setting these parameters accurately, performance 2724 during matrix assembly can be increased by more than a factor of 50. 2725 2726 Collective on MPI_Comm 2727 2728 Input Parameters: 2729 + A - the matrix 2730 . bs - size of block 2731 . nz - number of block nonzeros per block row (same for all rows) 2732 - nnz - array containing the number of block nonzeros in the various block rows 2733 (possibly different for each block row) or PETSC_NULL 2734 2735 Options Database Keys: 2736 . -mat_no_unroll - uses code that does not unroll the loops in the 2737 block calculations (much slower) 2738 . -mat_block_size - size of the blocks to use 2739 2740 Level: intermediate 2741 2742 Notes: 2743 If the nnz parameter is given then the nz parameter is ignored 2744 2745 The block AIJ format is fully compatible with standard Fortran 77 2746 storage. That is, the stored row and column indices can begin at 2747 either one (as in Fortran) or zero. See the users' manual for details. 2748 2749 Specify the preallocated storage with either nz or nnz (not both). 2750 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2751 allocation. For additional details, see the users manual chapter on 2752 matrices. 2753 2754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2755 @*/ 2756 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2757 { 2758 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2759 2760 PetscFunctionBegin; 2761 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2762 if (f) { 2763 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2764 } 2765 PetscFunctionReturn(0); 2766 } 2767 2768