1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt n = a->mbs,i; 785 786 PetscFunctionBegin; 787 *nn = n; 788 if (!ia) PetscFunctionReturn(0); 789 if (symmetric) { 790 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 791 } else if (oshift == 1) { 792 /* temporarily add 1 to i and j indices */ 793 PetscInt nz = a->i[n]; 794 for (i=0; i<nz; i++) a->j[i]++; 795 for (i=0; i<n+1; i++) a->i[i]++; 796 *ia = a->i; *ja = a->j; 797 } else { 798 *ia = a->i; *ja = a->j; 799 } 800 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscErrorCode ierr; 810 PetscInt i,n = a->mbs; 811 812 PetscFunctionBegin; 813 if (!ia) PetscFunctionReturn(0); 814 if (symmetric) { 815 ierr = PetscFree(*ia);CHKERRQ(ierr); 816 ierr = PetscFree(*ja);CHKERRQ(ierr); 817 } else if (oshift == 1) { 818 PetscInt nz = a->i[n]-1; 819 for (i=0; i<nz; i++) a->j[i]--; 820 for (i=0; i<n+1; i++) a->i[i]--; 821 } 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatDestroy_SeqBAIJ" 827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 #if defined(PETSC_USE_LOG) 834 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 835 #endif 836 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 837 if (a->row) { 838 ierr = ISDestroy(a->row);CHKERRQ(ierr); 839 } 840 if (a->col) { 841 ierr = ISDestroy(a->col);CHKERRQ(ierr); 842 } 843 ierr = PetscFree(a->diag);CHKERRQ(ierr); 844 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 845 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 846 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 847 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 848 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 849 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 850 #if defined(PETSC_USE_MAT_SINGLE) 851 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 852 #endif 853 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 854 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 855 856 ierr = PetscFree(a);CHKERRQ(ierr); 857 858 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatSetOption_SeqBAIJ" 871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 872 { 873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 switch (op) { 878 case MAT_ROW_ORIENTED: 879 a->roworiented = PETSC_TRUE; 880 break; 881 case MAT_COLUMN_ORIENTED: 882 a->roworiented = PETSC_FALSE; 883 break; 884 case MAT_COLUMNS_SORTED: 885 a->sorted = PETSC_TRUE; 886 break; 887 case MAT_COLUMNS_UNSORTED: 888 a->sorted = PETSC_FALSE; 889 break; 890 case MAT_KEEP_ZEROED_ROWS: 891 a->keepzeroedrows = PETSC_TRUE; 892 break; 893 case MAT_NO_NEW_NONZERO_LOCATIONS: 894 a->nonew = 1; 895 break; 896 case MAT_NEW_NONZERO_LOCATION_ERR: 897 a->nonew = -1; 898 break; 899 case MAT_NEW_NONZERO_ALLOCATION_ERR: 900 a->nonew = -2; 901 break; 902 case MAT_YES_NEW_NONZERO_LOCATIONS: 903 a->nonew = 0; 904 break; 905 case MAT_ROWS_SORTED: 906 case MAT_ROWS_UNSORTED: 907 case MAT_YES_NEW_DIAGONALS: 908 case MAT_IGNORE_OFF_PROC_ENTRIES: 909 case MAT_USE_HASH_TABLE: 910 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 911 break; 912 case MAT_NO_NEW_DIAGONALS: 913 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 914 case MAT_SYMMETRIC: 915 case MAT_STRUCTURALLY_SYMMETRIC: 916 case MAT_NOT_SYMMETRIC: 917 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 918 case MAT_HERMITIAN: 919 case MAT_NOT_HERMITIAN: 920 case MAT_SYMMETRY_ETERNAL: 921 case MAT_NOT_SYMMETRY_ETERNAL: 922 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 923 break; 924 default: 925 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 926 } 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatGetRow_SeqBAIJ" 932 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 933 { 934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 935 PetscErrorCode ierr; 936 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 937 MatScalar *aa,*aa_i; 938 PetscScalar *v_i; 939 940 PetscFunctionBegin; 941 bs = A->rmap.bs; 942 ai = a->i; 943 aj = a->j; 944 aa = a->a; 945 bs2 = a->bs2; 946 947 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 948 949 bn = row/bs; /* Block number */ 950 bp = row % bs; /* Block Position */ 951 M = ai[bn+1] - ai[bn]; 952 *nz = bs*M; 953 954 if (v) { 955 *v = 0; 956 if (*nz) { 957 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 958 for (i=0; i<M; i++) { /* for each block in the block row */ 959 v_i = *v + i*bs; 960 aa_i = aa + bs2*(ai[bn] + i); 961 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 962 } 963 } 964 } 965 966 if (idx) { 967 *idx = 0; 968 if (*nz) { 969 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 970 for (i=0; i<M; i++) { /* for each block in the block row */ 971 idx_i = *idx + i*bs; 972 itmp = bs*aj[ai[bn] + i]; 973 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 974 } 975 } 976 } 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 982 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 983 { 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 988 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatTranspose_SeqBAIJ" 994 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 995 { 996 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 997 Mat C; 998 PetscErrorCode ierr; 999 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1000 PetscInt *rows,*cols,bs2=a->bs2; 1001 PetscScalar *array; 1002 1003 PetscFunctionBegin; 1004 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1005 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1006 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1007 1008 #if defined(PETSC_USE_MAT_SINGLE) 1009 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1010 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1011 #else 1012 array = a->a; 1013 #endif 1014 1015 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1016 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1017 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1018 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1019 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1020 ierr = PetscFree(col);CHKERRQ(ierr); 1021 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1022 cols = rows + bs; 1023 for (i=0; i<mbs; i++) { 1024 cols[0] = i*bs; 1025 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1026 len = ai[i+1] - ai[i]; 1027 for (j=0; j<len; j++) { 1028 rows[0] = (*aj++)*bs; 1029 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1030 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1031 array += bs2; 1032 } 1033 } 1034 ierr = PetscFree(rows);CHKERRQ(ierr); 1035 #if defined(PETSC_USE_MAT_SINGLE) 1036 ierr = PetscFree(array); 1037 #endif 1038 1039 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 1042 if (B) { 1043 *B = C; 1044 } else { 1045 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1046 } 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1052 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1053 { 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1057 int fd; 1058 PetscScalar *aa; 1059 FILE *file; 1060 1061 PetscFunctionBegin; 1062 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1063 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1064 col_lens[0] = MAT_FILE_COOKIE; 1065 1066 col_lens[1] = A->rmap.N; 1067 col_lens[2] = A->cmap.n; 1068 col_lens[3] = a->nz*bs2; 1069 1070 /* store lengths of each row and write (including header) to file */ 1071 count = 0; 1072 for (i=0; i<a->mbs; i++) { 1073 for (j=0; j<bs; j++) { 1074 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1075 } 1076 } 1077 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1078 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1079 1080 /* store column indices (zero start index) */ 1081 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1082 count = 0; 1083 for (i=0; i<a->mbs; i++) { 1084 for (j=0; j<bs; j++) { 1085 for (k=a->i[i]; k<a->i[i+1]; k++) { 1086 for (l=0; l<bs; l++) { 1087 jj[count++] = bs*a->j[k] + l; 1088 } 1089 } 1090 } 1091 } 1092 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1093 ierr = PetscFree(jj);CHKERRQ(ierr); 1094 1095 /* store nonzero values */ 1096 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1097 count = 0; 1098 for (i=0; i<a->mbs; i++) { 1099 for (j=0; j<bs; j++) { 1100 for (k=a->i[i]; k<a->i[i+1]; k++) { 1101 for (l=0; l<bs; l++) { 1102 aa[count++] = a->a[bs2*k + l*bs + j]; 1103 } 1104 } 1105 } 1106 } 1107 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1108 ierr = PetscFree(aa);CHKERRQ(ierr); 1109 1110 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1111 if (file) { 1112 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1119 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1120 { 1121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1124 PetscViewerFormat format; 1125 1126 PetscFunctionBegin; 1127 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1128 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1129 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1130 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1131 Mat aij; 1132 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1133 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1134 ierr = MatDestroy(aij);CHKERRQ(ierr); 1135 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1136 PetscFunctionReturn(0); 1137 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1138 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1139 for (i=0; i<a->mbs; i++) { 1140 for (j=0; j<bs; j++) { 1141 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1142 for (k=a->i[i]; k<a->i[i+1]; k++) { 1143 for (l=0; l<bs; l++) { 1144 #if defined(PETSC_USE_COMPLEX) 1145 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1146 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1147 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1148 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1149 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1150 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1151 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1152 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1153 } 1154 #else 1155 if (a->a[bs2*k + l*bs + j] != 0.0) { 1156 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1157 } 1158 #endif 1159 } 1160 } 1161 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1162 } 1163 } 1164 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1167 for (i=0; i<a->mbs; i++) { 1168 for (j=0; j<bs; j++) { 1169 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1170 for (k=a->i[i]; k<a->i[i+1]; k++) { 1171 for (l=0; l<bs; l++) { 1172 #if defined(PETSC_USE_COMPLEX) 1173 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1174 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1175 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1176 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1177 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1178 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1179 } else { 1180 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1181 } 1182 #else 1183 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1184 #endif 1185 } 1186 } 1187 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1188 } 1189 } 1190 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1198 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1199 { 1200 Mat A = (Mat) Aa; 1201 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1204 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1205 MatScalar *aa; 1206 PetscViewer viewer; 1207 1208 PetscFunctionBegin; 1209 1210 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1211 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1212 1213 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1214 1215 /* loop over matrix elements drawing boxes */ 1216 color = PETSC_DRAW_BLUE; 1217 for (i=0,row=0; i<mbs; i++,row+=bs) { 1218 for (j=a->i[i]; j<a->i[i+1]; j++) { 1219 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1220 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1221 aa = a->a + j*bs2; 1222 for (k=0; k<bs; k++) { 1223 for (l=0; l<bs; l++) { 1224 if (PetscRealPart(*aa++) >= 0.) continue; 1225 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1226 } 1227 } 1228 } 1229 } 1230 color = PETSC_DRAW_CYAN; 1231 for (i=0,row=0; i<mbs; i++,row+=bs) { 1232 for (j=a->i[i]; j<a->i[i+1]; j++) { 1233 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1234 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1235 aa = a->a + j*bs2; 1236 for (k=0; k<bs; k++) { 1237 for (l=0; l<bs; l++) { 1238 if (PetscRealPart(*aa++) != 0.) continue; 1239 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1240 } 1241 } 1242 } 1243 } 1244 1245 color = PETSC_DRAW_RED; 1246 for (i=0,row=0; i<mbs; i++,row+=bs) { 1247 for (j=a->i[i]; j<a->i[i+1]; j++) { 1248 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1249 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1250 aa = a->a + j*bs2; 1251 for (k=0; k<bs; k++) { 1252 for (l=0; l<bs; l++) { 1253 if (PetscRealPart(*aa++) <= 0.) continue; 1254 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1255 } 1256 } 1257 } 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1264 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1265 { 1266 PetscErrorCode ierr; 1267 PetscReal xl,yl,xr,yr,w,h; 1268 PetscDraw draw; 1269 PetscTruth isnull; 1270 1271 PetscFunctionBegin; 1272 1273 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1274 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1275 1276 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1277 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1278 xr += w; yr += h; xl = -w; yl = -h; 1279 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1280 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1281 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatView_SeqBAIJ" 1287 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1288 { 1289 PetscErrorCode ierr; 1290 PetscTruth iascii,isbinary,isdraw; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1294 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1295 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1296 if (iascii){ 1297 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1298 } else if (isbinary) { 1299 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1300 } else if (isdraw) { 1301 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1302 } else { 1303 Mat B; 1304 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1305 ierr = MatView(B,viewer);CHKERRQ(ierr); 1306 ierr = MatDestroy(B);CHKERRQ(ierr); 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1314 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1315 { 1316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1317 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1318 PetscInt *ai = a->i,*ailen = a->ilen; 1319 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1320 MatScalar *ap,*aa = a->a,zero = 0.0; 1321 1322 PetscFunctionBegin; 1323 for (k=0; k<m; k++) { /* loop over rows */ 1324 row = im[k]; brow = row/bs; 1325 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1326 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1327 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1328 nrow = ailen[brow]; 1329 for (l=0; l<n; l++) { /* loop over columns */ 1330 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1331 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1332 col = in[l] ; 1333 bcol = col/bs; 1334 cidx = col%bs; 1335 ridx = row%bs; 1336 high = nrow; 1337 low = 0; /* assume unsorted */ 1338 while (high-low > 5) { 1339 t = (low+high)/2; 1340 if (rp[t] > bcol) high = t; 1341 else low = t; 1342 } 1343 for (i=low; i<high; i++) { 1344 if (rp[i] > bcol) break; 1345 if (rp[i] == bcol) { 1346 *v++ = ap[bs2*i+bs*cidx+ridx]; 1347 goto finished; 1348 } 1349 } 1350 *v++ = zero; 1351 finished:; 1352 } 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #if defined(PETSC_USE_MAT_SINGLE) 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1360 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1361 { 1362 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1363 PetscErrorCode ierr; 1364 PetscInt i,N = m*n*b->bs2; 1365 MatScalar *vsingle; 1366 1367 PetscFunctionBegin; 1368 if (N > b->setvalueslen) { 1369 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1370 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1371 b->setvalueslen = N; 1372 } 1373 vsingle = b->setvaluescopy; 1374 for (i=0; i<N; i++) { 1375 vsingle[i] = v[i]; 1376 } 1377 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 #endif 1381 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1385 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1386 { 1387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1388 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1389 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1390 PetscErrorCode ierr; 1391 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1392 PetscTruth roworiented=a->roworiented; 1393 const MatScalar *value = v; 1394 MatScalar *ap,*aa = a->a,*bap; 1395 1396 PetscFunctionBegin; 1397 if (roworiented) { 1398 stepval = (n-1)*bs; 1399 } else { 1400 stepval = (m-1)*bs; 1401 } 1402 for (k=0; k<m; k++) { /* loop over added rows */ 1403 row = im[k]; 1404 if (row < 0) continue; 1405 #if defined(PETSC_USE_DEBUG) 1406 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1407 #endif 1408 rp = aj + ai[row]; 1409 ap = aa + bs2*ai[row]; 1410 rmax = imax[row]; 1411 nrow = ailen[row]; 1412 low = 0; 1413 high = nrow; 1414 for (l=0; l<n; l++) { /* loop over added columns */ 1415 if (in[l] < 0) continue; 1416 #if defined(PETSC_USE_DEBUG) 1417 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1418 #endif 1419 col = in[l]; 1420 if (roworiented) { 1421 value = v + k*(stepval+bs)*bs + l*bs; 1422 } else { 1423 value = v + l*(stepval+bs)*bs + k*bs; 1424 } 1425 if (col <= lastcol) low = 0; else high = nrow; 1426 lastcol = col; 1427 while (high-low > 7) { 1428 t = (low+high)/2; 1429 if (rp[t] > col) high = t; 1430 else low = t; 1431 } 1432 for (i=low; i<high; i++) { 1433 if (rp[i] > col) break; 1434 if (rp[i] == col) { 1435 bap = ap + bs2*i; 1436 if (roworiented) { 1437 if (is == ADD_VALUES) { 1438 for (ii=0; ii<bs; ii++,value+=stepval) { 1439 for (jj=ii; jj<bs2; jj+=bs) { 1440 bap[jj] += *value++; 1441 } 1442 } 1443 } else { 1444 for (ii=0; ii<bs; ii++,value+=stepval) { 1445 for (jj=ii; jj<bs2; jj+=bs) { 1446 bap[jj] = *value++; 1447 } 1448 } 1449 } 1450 } else { 1451 if (is == ADD_VALUES) { 1452 for (ii=0; ii<bs; ii++,value+=stepval) { 1453 for (jj=0; jj<bs; jj++) { 1454 *bap++ += *value++; 1455 } 1456 } 1457 } else { 1458 for (ii=0; ii<bs; ii++,value+=stepval) { 1459 for (jj=0; jj<bs; jj++) { 1460 *bap++ = *value++; 1461 } 1462 } 1463 } 1464 } 1465 goto noinsert2; 1466 } 1467 } 1468 if (nonew == 1) goto noinsert2; 1469 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1470 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1471 N = nrow++ - 1; high++; 1472 /* shift up all the later entries in this row */ 1473 for (ii=N; ii>=i; ii--) { 1474 rp[ii+1] = rp[ii]; 1475 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1476 } 1477 if (N >= i) { 1478 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1479 } 1480 rp[i] = col; 1481 bap = ap + bs2*i; 1482 if (roworiented) { 1483 for (ii=0; ii<bs; ii++,value+=stepval) { 1484 for (jj=ii; jj<bs2; jj+=bs) { 1485 bap[jj] = *value++; 1486 } 1487 } 1488 } else { 1489 for (ii=0; ii<bs; ii++,value+=stepval) { 1490 for (jj=0; jj<bs; jj++) { 1491 *bap++ = *value++; 1492 } 1493 } 1494 } 1495 noinsert2:; 1496 low = i; 1497 } 1498 ailen[row] = nrow; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1505 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1506 { 1507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1508 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1509 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1510 PetscErrorCode ierr; 1511 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1512 MatScalar *aa = a->a,*ap; 1513 PetscReal ratio=0.6; 1514 1515 PetscFunctionBegin; 1516 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1517 1518 if (m) rmax = ailen[0]; 1519 for (i=1; i<mbs; i++) { 1520 /* move each row back by the amount of empty slots (fshift) before it*/ 1521 fshift += imax[i-1] - ailen[i-1]; 1522 rmax = PetscMax(rmax,ailen[i]); 1523 if (fshift) { 1524 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1525 N = ailen[i]; 1526 for (j=0; j<N; j++) { 1527 ip[j-fshift] = ip[j]; 1528 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1529 } 1530 } 1531 ai[i] = ai[i-1] + ailen[i-1]; 1532 } 1533 if (mbs) { 1534 fshift += imax[mbs-1] - ailen[mbs-1]; 1535 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1536 } 1537 /* reset ilen and imax for each row */ 1538 for (i=0; i<mbs; i++) { 1539 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1540 } 1541 a->nz = ai[mbs]; 1542 1543 /* diagonals may have moved, so kill the diagonal pointers */ 1544 a->idiagvalid = PETSC_FALSE; 1545 if (fshift && a->diag) { 1546 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1547 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1548 a->diag = 0; 1549 } 1550 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1551 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1552 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1553 a->reallocs = 0; 1554 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1555 1556 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1557 if (a->compressedrow.use){ 1558 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1559 } 1560 1561 A->same_nonzero = PETSC_TRUE; 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /* 1566 This function returns an array of flags which indicate the locations of contiguous 1567 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1568 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1569 Assume: sizes should be long enough to hold all the values. 1570 */ 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1573 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1574 { 1575 PetscInt i,j,k,row; 1576 PetscTruth flg; 1577 1578 PetscFunctionBegin; 1579 for (i=0,j=0; i<n; j++) { 1580 row = idx[i]; 1581 if (row%bs!=0) { /* Not the begining of a block */ 1582 sizes[j] = 1; 1583 i++; 1584 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1585 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1586 i++; 1587 } else { /* Begining of the block, so check if the complete block exists */ 1588 flg = PETSC_TRUE; 1589 for (k=1; k<bs; k++) { 1590 if (row+k != idx[i+k]) { /* break in the block */ 1591 flg = PETSC_FALSE; 1592 break; 1593 } 1594 } 1595 if (flg) { /* No break in the bs */ 1596 sizes[j] = bs; 1597 i+= bs; 1598 } else { 1599 sizes[j] = 1; 1600 i++; 1601 } 1602 } 1603 } 1604 *bs_max = j; 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1610 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1611 { 1612 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 PetscInt i,j,k,count,*rows; 1615 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1616 PetscScalar zero = 0.0; 1617 MatScalar *aa; 1618 1619 PetscFunctionBegin; 1620 /* Make a copy of the IS and sort it */ 1621 /* allocate memory for rows,sizes */ 1622 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1623 sizes = rows + is_n; 1624 1625 /* copy IS values to rows, and sort them */ 1626 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1627 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1628 if (baij->keepzeroedrows) { 1629 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1630 bs_max = is_n; 1631 A->same_nonzero = PETSC_TRUE; 1632 } else { 1633 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1634 A->same_nonzero = PETSC_FALSE; 1635 } 1636 1637 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1638 row = rows[j]; 1639 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1640 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1641 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1642 if (sizes[i] == bs && !baij->keepzeroedrows) { 1643 if (diag != 0.0) { 1644 if (baij->ilen[row/bs] > 0) { 1645 baij->ilen[row/bs] = 1; 1646 baij->j[baij->i[row/bs]] = row/bs; 1647 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1648 } 1649 /* Now insert all the diagonal values for this bs */ 1650 for (k=0; k<bs; k++) { 1651 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 } else { /* (diag == 0.0) */ 1654 baij->ilen[row/bs] = 0; 1655 } /* end (diag == 0.0) */ 1656 } else { /* (sizes[i] != bs) */ 1657 #if defined (PETSC_USE_DEBUG) 1658 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1659 #endif 1660 for (k=0; k<count; k++) { 1661 aa[0] = zero; 1662 aa += bs; 1663 } 1664 if (diag != 0.0) { 1665 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1666 } 1667 } 1668 } 1669 1670 ierr = PetscFree(rows);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1678 { 1679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1680 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1681 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1682 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1683 PetscErrorCode ierr; 1684 PetscInt ridx,cidx,bs2=a->bs2; 1685 PetscTruth roworiented=a->roworiented; 1686 MatScalar *ap,value,*aa=a->a,*bap; 1687 1688 PetscFunctionBegin; 1689 for (k=0; k<m; k++) { /* loop over added rows */ 1690 row = im[k]; 1691 brow = row/bs; 1692 if (row < 0) continue; 1693 #if defined(PETSC_USE_DEBUG) 1694 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1695 #endif 1696 rp = aj + ai[brow]; 1697 ap = aa + bs2*ai[brow]; 1698 rmax = imax[brow]; 1699 nrow = ailen[brow]; 1700 low = 0; 1701 high = nrow; 1702 for (l=0; l<n; l++) { /* loop over added columns */ 1703 if (in[l] < 0) continue; 1704 #if defined(PETSC_USE_DEBUG) 1705 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1706 #endif 1707 col = in[l]; bcol = col/bs; 1708 ridx = row % bs; cidx = col % bs; 1709 if (roworiented) { 1710 value = v[l + k*n]; 1711 } else { 1712 value = v[k + l*m]; 1713 } 1714 if (col <= lastcol) low = 0; else high = nrow; 1715 lastcol = col; 1716 while (high-low > 7) { 1717 t = (low+high)/2; 1718 if (rp[t] > bcol) high = t; 1719 else low = t; 1720 } 1721 for (i=low; i<high; i++) { 1722 if (rp[i] > bcol) break; 1723 if (rp[i] == bcol) { 1724 bap = ap + bs2*i + bs*cidx + ridx; 1725 if (is == ADD_VALUES) *bap += value; 1726 else *bap = value; 1727 goto noinsert1; 1728 } 1729 } 1730 if (nonew == 1) goto noinsert1; 1731 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1732 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1733 N = nrow++ - 1; high++; 1734 /* shift up all the later entries in this row */ 1735 for (ii=N; ii>=i; ii--) { 1736 rp[ii+1] = rp[ii]; 1737 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1738 } 1739 if (N>=i) { 1740 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1741 } 1742 rp[i] = bcol; 1743 ap[bs2*i + bs*cidx + ridx] = value; 1744 a->nz++; 1745 noinsert1:; 1746 low = i; 1747 } 1748 ailen[brow] = nrow; 1749 } 1750 A->same_nonzero = PETSC_FALSE; 1751 PetscFunctionReturn(0); 1752 } 1753 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1757 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1758 { 1759 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1760 Mat outA; 1761 PetscErrorCode ierr; 1762 PetscTruth row_identity,col_identity; 1763 1764 PetscFunctionBegin; 1765 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1766 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1767 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1768 if (!row_identity || !col_identity) { 1769 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1770 } 1771 1772 outA = inA; 1773 inA->factor = FACTOR_LU; 1774 1775 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1776 1777 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1778 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1779 a->row = row; 1780 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1781 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1782 a->col = col; 1783 1784 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1785 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1786 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1787 1788 /* 1789 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1790 for ILU(0) factorization with natural ordering 1791 */ 1792 if (inA->rmap.bs < 8) { 1793 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1794 } else { 1795 if (!a->solve_work) { 1796 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1797 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1798 } 1799 } 1800 1801 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1802 1803 PetscFunctionReturn(0); 1804 } 1805 1806 EXTERN_C_BEGIN 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1809 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1810 { 1811 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1812 PetscInt i,nz,nbs; 1813 1814 PetscFunctionBegin; 1815 nz = baij->maxnz/baij->bs2; 1816 nbs = baij->nbs; 1817 for (i=0; i<nz; i++) { 1818 baij->j[i] = indices[i]; 1819 } 1820 baij->nz = nz; 1821 for (i=0; i<nbs; i++) { 1822 baij->ilen[i] = baij->imax[i]; 1823 } 1824 1825 PetscFunctionReturn(0); 1826 } 1827 EXTERN_C_END 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1831 /*@ 1832 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1833 in the matrix. 1834 1835 Input Parameters: 1836 + mat - the SeqBAIJ matrix 1837 - indices - the column indices 1838 1839 Level: advanced 1840 1841 Notes: 1842 This can be called if you have precomputed the nonzero structure of the 1843 matrix and want to provide it to the matrix object to improve the performance 1844 of the MatSetValues() operation. 1845 1846 You MUST have set the correct numbers of nonzeros per row in the call to 1847 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1848 1849 MUST be called before any calls to MatSetValues(); 1850 1851 @*/ 1852 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1853 { 1854 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1855 1856 PetscFunctionBegin; 1857 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1858 PetscValidPointer(indices,2); 1859 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1860 if (f) { 1861 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1862 } else { 1863 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1870 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1871 { 1872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1873 PetscErrorCode ierr; 1874 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1875 PetscReal atmp; 1876 PetscScalar *x,zero = 0.0; 1877 MatScalar *aa; 1878 PetscInt ncols,brow,krow,kcol; 1879 1880 PetscFunctionBegin; 1881 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1882 bs = A->rmap.bs; 1883 aa = a->a; 1884 ai = a->i; 1885 aj = a->j; 1886 mbs = a->mbs; 1887 1888 ierr = VecSet(v,zero);CHKERRQ(ierr); 1889 if (idx) { 1890 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1891 } 1892 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1893 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1894 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1895 for (i=0; i<mbs; i++) { 1896 ncols = ai[1] - ai[0]; ai++; 1897 brow = bs*i; 1898 for (j=0; j<ncols; j++){ 1899 for (kcol=0; kcol<bs; kcol++){ 1900 for (krow=0; krow<bs; krow++){ 1901 atmp = PetscAbsScalar(*aa);aa++; 1902 row = brow + krow; /* row index */ 1903 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1904 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1905 } 1906 } 1907 aj++; 1908 } 1909 } 1910 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatCopy_SeqBAIJ" 1916 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1917 { 1918 PetscErrorCode ierr; 1919 1920 PetscFunctionBegin; 1921 /* If the two matrices have the same copy implementation, use fast copy. */ 1922 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1923 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1924 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1925 1926 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1927 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1928 } 1929 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1930 } else { 1931 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1932 } 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1938 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1939 { 1940 PetscErrorCode ierr; 1941 1942 PetscFunctionBegin; 1943 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1949 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1950 { 1951 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1952 PetscFunctionBegin; 1953 *array = a->a; 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1959 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1960 { 1961 PetscFunctionBegin; 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #include "petscblaslapack.h" 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1968 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1969 { 1970 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1971 PetscErrorCode ierr; 1972 PetscInt i,bs=Y->rmap.bs,j,bs2; 1973 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1974 1975 PetscFunctionBegin; 1976 if (str == SAME_NONZERO_PATTERN) { 1977 PetscScalar alpha = a; 1978 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1979 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1980 if (y->xtoy && y->XtoY != X) { 1981 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1982 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1983 } 1984 if (!y->xtoy) { /* get xtoy */ 1985 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1986 y->XtoY = X; 1987 } 1988 bs2 = bs*bs; 1989 for (i=0; i<x->nz; i++) { 1990 j = 0; 1991 while (j < bs2){ 1992 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1993 j++; 1994 } 1995 } 1996 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1997 } else { 1998 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2005 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2006 { 2007 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2008 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2009 PetscScalar *aa = a->a; 2010 2011 PetscFunctionBegin; 2012 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2018 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2019 { 2020 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2021 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2022 PetscScalar *aa = a->a; 2023 2024 PetscFunctionBegin; 2025 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 2030 /* -------------------------------------------------------------------*/ 2031 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2032 MatGetRow_SeqBAIJ, 2033 MatRestoreRow_SeqBAIJ, 2034 MatMult_SeqBAIJ_N, 2035 /* 4*/ MatMultAdd_SeqBAIJ_N, 2036 MatMultTranspose_SeqBAIJ, 2037 MatMultTransposeAdd_SeqBAIJ, 2038 MatSolve_SeqBAIJ_N, 2039 0, 2040 0, 2041 /*10*/ 0, 2042 MatLUFactor_SeqBAIJ, 2043 0, 2044 0, 2045 MatTranspose_SeqBAIJ, 2046 /*15*/ MatGetInfo_SeqBAIJ, 2047 MatEqual_SeqBAIJ, 2048 MatGetDiagonal_SeqBAIJ, 2049 MatDiagonalScale_SeqBAIJ, 2050 MatNorm_SeqBAIJ, 2051 /*20*/ 0, 2052 MatAssemblyEnd_SeqBAIJ, 2053 0, 2054 MatSetOption_SeqBAIJ, 2055 MatZeroEntries_SeqBAIJ, 2056 /*25*/ MatZeroRows_SeqBAIJ, 2057 MatLUFactorSymbolic_SeqBAIJ, 2058 MatLUFactorNumeric_SeqBAIJ_N, 2059 MatCholeskyFactorSymbolic_SeqBAIJ, 2060 MatCholeskyFactorNumeric_SeqBAIJ_N, 2061 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2062 MatILUFactorSymbolic_SeqBAIJ, 2063 MatICCFactorSymbolic_SeqBAIJ, 2064 MatGetArray_SeqBAIJ, 2065 MatRestoreArray_SeqBAIJ, 2066 /*35*/ MatDuplicate_SeqBAIJ, 2067 0, 2068 0, 2069 MatILUFactor_SeqBAIJ, 2070 0, 2071 /*40*/ MatAXPY_SeqBAIJ, 2072 MatGetSubMatrices_SeqBAIJ, 2073 MatIncreaseOverlap_SeqBAIJ, 2074 MatGetValues_SeqBAIJ, 2075 MatCopy_SeqBAIJ, 2076 /*45*/ 0, 2077 MatScale_SeqBAIJ, 2078 0, 2079 0, 2080 0, 2081 /*50*/ 0, 2082 MatGetRowIJ_SeqBAIJ, 2083 MatRestoreRowIJ_SeqBAIJ, 2084 0, 2085 0, 2086 /*55*/ 0, 2087 0, 2088 0, 2089 0, 2090 MatSetValuesBlocked_SeqBAIJ, 2091 /*60*/ MatGetSubMatrix_SeqBAIJ, 2092 MatDestroy_SeqBAIJ, 2093 MatView_SeqBAIJ, 2094 0, 2095 0, 2096 /*65*/ 0, 2097 0, 2098 0, 2099 0, 2100 0, 2101 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2102 MatConvert_Basic, 2103 0, 2104 0, 2105 0, 2106 /*75*/ 0, 2107 0, 2108 0, 2109 0, 2110 0, 2111 /*80*/ 0, 2112 0, 2113 0, 2114 0, 2115 MatLoad_SeqBAIJ, 2116 /*85*/ 0, 2117 0, 2118 0, 2119 0, 2120 0, 2121 /*90*/ 0, 2122 0, 2123 0, 2124 0, 2125 0, 2126 /*95*/ 0, 2127 0, 2128 0, 2129 0, 2130 0, 2131 /*100*/0, 2132 0, 2133 0, 2134 0, 2135 0, 2136 /*105*/0, 2137 MatRealPart_SeqBAIJ, 2138 MatImaginaryPart_SeqBAIJ 2139 }; 2140 2141 EXTERN_C_BEGIN 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2144 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2145 { 2146 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2147 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2148 PetscErrorCode ierr; 2149 2150 PetscFunctionBegin; 2151 if (aij->nonew != 1) { 2152 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2153 } 2154 2155 /* allocate space for values if not already there */ 2156 if (!aij->saved_values) { 2157 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2158 } 2159 2160 /* copy values over */ 2161 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 EXTERN_C_END 2165 2166 EXTERN_C_BEGIN 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2169 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2170 { 2171 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2172 PetscErrorCode ierr; 2173 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2174 2175 PetscFunctionBegin; 2176 if (aij->nonew != 1) { 2177 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2178 } 2179 if (!aij->saved_values) { 2180 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2181 } 2182 2183 /* copy values over */ 2184 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 EXTERN_C_END 2188 2189 EXTERN_C_BEGIN 2190 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2191 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2192 EXTERN_C_END 2193 2194 EXTERN_C_BEGIN 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2197 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2198 { 2199 Mat_SeqBAIJ *b; 2200 PetscErrorCode ierr; 2201 PetscInt i,mbs,nbs,bs2,newbs = bs; 2202 PetscTruth flg,skipallocation = PETSC_FALSE; 2203 2204 PetscFunctionBegin; 2205 2206 if (nz == MAT_SKIP_ALLOCATION) { 2207 skipallocation = PETSC_TRUE; 2208 nz = 0; 2209 } 2210 2211 ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2212 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2213 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2214 2215 if (nnz && newbs != bs) { 2216 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2217 } 2218 bs = newbs; 2219 2220 B->rmap.bs = B->cmap.bs = bs; 2221 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2222 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2223 2224 B->preallocated = PETSC_TRUE; 2225 2226 mbs = B->rmap.n/bs; 2227 nbs = B->cmap.n/bs; 2228 bs2 = bs*bs; 2229 2230 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2231 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2232 } 2233 2234 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2235 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2236 if (nnz) { 2237 for (i=0; i<mbs; i++) { 2238 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2239 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2240 } 2241 } 2242 2243 b = (Mat_SeqBAIJ*)B->data; 2244 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2245 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2246 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2247 2248 B->ops->solve = MatSolve_SeqBAIJ_Update; 2249 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2250 if (!flg) { 2251 switch (bs) { 2252 case 1: 2253 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2254 B->ops->mult = MatMult_SeqBAIJ_1; 2255 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2256 break; 2257 case 2: 2258 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2259 B->ops->mult = MatMult_SeqBAIJ_2; 2260 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2261 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2262 break; 2263 case 3: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2265 B->ops->mult = MatMult_SeqBAIJ_3; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2267 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2268 break; 2269 case 4: 2270 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2271 B->ops->mult = MatMult_SeqBAIJ_4; 2272 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2273 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2274 break; 2275 case 5: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2277 B->ops->mult = MatMult_SeqBAIJ_5; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2279 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2280 break; 2281 case 6: 2282 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2283 B->ops->mult = MatMult_SeqBAIJ_6; 2284 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2285 break; 2286 case 7: 2287 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2288 B->ops->mult = MatMult_SeqBAIJ_7; 2289 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2290 break; 2291 default: 2292 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2293 B->ops->mult = MatMult_SeqBAIJ_N; 2294 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2295 break; 2296 } 2297 } 2298 B->rmap.bs = bs; 2299 b->mbs = mbs; 2300 b->nbs = nbs; 2301 if (!skipallocation) { 2302 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2303 /* b->ilen will count nonzeros in each block row so far. */ 2304 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2305 if (!nnz) { 2306 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2307 else if (nz <= 0) nz = 1; 2308 for (i=0; i<mbs; i++) b->imax[i] = nz; 2309 nz = nz*mbs; 2310 } else { 2311 nz = 0; 2312 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2313 } 2314 2315 /* allocate the matrix space */ 2316 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2317 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2318 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2319 b->singlemalloc = PETSC_TRUE; 2320 2321 b->i[0] = 0; 2322 for (i=1; i<mbs+1; i++) { 2323 b->i[i] = b->i[i-1] + b->imax[i-1]; 2324 } 2325 b->free_a = PETSC_TRUE; 2326 b->free_ij = PETSC_TRUE; 2327 } else { 2328 b->free_a = PETSC_FALSE; 2329 b->free_ij = PETSC_FALSE; 2330 } 2331 2332 B->rmap.bs = bs; 2333 b->bs2 = bs2; 2334 b->mbs = mbs; 2335 b->nz = 0; 2336 b->maxnz = nz*bs2; 2337 B->info.nz_unneeded = (PetscReal)b->maxnz; 2338 PetscFunctionReturn(0); 2339 } 2340 EXTERN_C_END 2341 2342 /*MC 2343 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2344 block sparse compressed row format. 2345 2346 Options Database Keys: 2347 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2348 2349 Level: beginner 2350 2351 .seealso: MatCreateSeqBAIJ() 2352 M*/ 2353 2354 EXTERN_C_BEGIN 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatCreate_SeqBAIJ" 2357 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2358 { 2359 PetscErrorCode ierr; 2360 PetscMPIInt size; 2361 Mat_SeqBAIJ *b; 2362 2363 PetscFunctionBegin; 2364 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2365 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2366 2367 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2368 B->data = (void*)b; 2369 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2370 B->factor = 0; 2371 B->mapping = 0; 2372 b->row = 0; 2373 b->col = 0; 2374 b->icol = 0; 2375 b->reallocs = 0; 2376 b->saved_values = 0; 2377 #if defined(PETSC_USE_MAT_SINGLE) 2378 b->setvalueslen = 0; 2379 b->setvaluescopy = PETSC_NULL; 2380 #endif 2381 2382 b->sorted = PETSC_FALSE; 2383 b->roworiented = PETSC_TRUE; 2384 b->nonew = 0; 2385 b->diag = 0; 2386 b->solve_work = 0; 2387 b->mult_work = 0; 2388 B->spptr = 0; 2389 B->info.nz_unneeded = (PetscReal)b->maxnz; 2390 b->keepzeroedrows = PETSC_FALSE; 2391 b->xtoy = 0; 2392 b->XtoY = 0; 2393 b->compressedrow.use = PETSC_FALSE; 2394 b->compressedrow.nrows = 0; 2395 b->compressedrow.i = PETSC_NULL; 2396 b->compressedrow.rindex = PETSC_NULL; 2397 b->compressedrow.checked = PETSC_FALSE; 2398 B->same_nonzero = PETSC_FALSE; 2399 2400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2401 "MatInvertBlockDiagonal_SeqBAIJ", 2402 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2404 "MatStoreValues_SeqBAIJ", 2405 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2407 "MatRetrieveValues_SeqBAIJ", 2408 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2410 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2411 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2413 "MatConvert_SeqBAIJ_SeqAIJ", 2414 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2416 "MatConvert_SeqBAIJ_SeqSBAIJ", 2417 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2418 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2419 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2420 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2421 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 EXTERN_C_END 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2428 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2429 { 2430 Mat C; 2431 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2432 PetscErrorCode ierr; 2433 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2434 2435 PetscFunctionBegin; 2436 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2437 2438 *B = 0; 2439 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2440 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2441 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2442 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2443 c = (Mat_SeqBAIJ*)C->data; 2444 2445 C->rmap.N = A->rmap.N; 2446 C->cmap.N = A->cmap.N; 2447 C->rmap.bs = A->rmap.bs; 2448 c->bs2 = a->bs2; 2449 c->mbs = a->mbs; 2450 c->nbs = a->nbs; 2451 2452 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2453 for (i=0; i<mbs; i++) { 2454 c->imax[i] = a->imax[i]; 2455 c->ilen[i] = a->ilen[i]; 2456 } 2457 2458 /* allocate the matrix space */ 2459 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2460 c->singlemalloc = PETSC_TRUE; 2461 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2462 if (mbs > 0) { 2463 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2464 if (cpvalues == MAT_COPY_VALUES) { 2465 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2466 } else { 2467 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2468 } 2469 } 2470 c->sorted = a->sorted; 2471 c->roworiented = a->roworiented; 2472 c->nonew = a->nonew; 2473 2474 if (a->diag) { 2475 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2476 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2477 for (i=0; i<mbs; i++) { 2478 c->diag[i] = a->diag[i]; 2479 } 2480 } else c->diag = 0; 2481 c->nz = a->nz; 2482 c->maxnz = a->maxnz; 2483 c->solve_work = 0; 2484 c->mult_work = 0; 2485 c->free_a = PETSC_TRUE; 2486 c->free_ij = PETSC_TRUE; 2487 C->preallocated = PETSC_TRUE; 2488 C->assembled = PETSC_TRUE; 2489 2490 c->compressedrow.use = a->compressedrow.use; 2491 c->compressedrow.nrows = a->compressedrow.nrows; 2492 c->compressedrow.checked = a->compressedrow.checked; 2493 if ( a->compressedrow.checked && a->compressedrow.use){ 2494 i = a->compressedrow.nrows; 2495 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2496 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2497 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2498 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2499 } else { 2500 c->compressedrow.use = PETSC_FALSE; 2501 c->compressedrow.i = PETSC_NULL; 2502 c->compressedrow.rindex = PETSC_NULL; 2503 } 2504 C->same_nonzero = A->same_nonzero; 2505 *B = C; 2506 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatLoad_SeqBAIJ" 2512 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2513 { 2514 Mat_SeqBAIJ *a; 2515 Mat B; 2516 PetscErrorCode ierr; 2517 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2518 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2519 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2520 PetscInt *masked,nmask,tmp,bs2,ishift; 2521 PetscMPIInt size; 2522 int fd; 2523 PetscScalar *aa; 2524 MPI_Comm comm = ((PetscObject)viewer)->comm; 2525 2526 PetscFunctionBegin; 2527 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2528 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2529 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2530 bs2 = bs*bs; 2531 2532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2533 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2534 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2535 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2536 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2537 M = header[1]; N = header[2]; nz = header[3]; 2538 2539 if (header[3] < 0) { 2540 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2541 } 2542 2543 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2544 2545 /* 2546 This code adds extra rows to make sure the number of rows is 2547 divisible by the blocksize 2548 */ 2549 mbs = M/bs; 2550 extra_rows = bs - M + bs*(mbs); 2551 if (extra_rows == bs) extra_rows = 0; 2552 else mbs++; 2553 if (extra_rows) { 2554 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2555 } 2556 2557 /* read in row lengths */ 2558 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2559 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2560 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2561 2562 /* read in column indices */ 2563 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2564 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2565 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2566 2567 /* loop over row lengths determining block row lengths */ 2568 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2569 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2570 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2571 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2572 masked = mask + mbs; 2573 rowcount = 0; nzcount = 0; 2574 for (i=0; i<mbs; i++) { 2575 nmask = 0; 2576 for (j=0; j<bs; j++) { 2577 kmax = rowlengths[rowcount]; 2578 for (k=0; k<kmax; k++) { 2579 tmp = jj[nzcount++]/bs; 2580 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2581 } 2582 rowcount++; 2583 } 2584 browlengths[i] += nmask; 2585 /* zero out the mask elements we set */ 2586 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2587 } 2588 2589 /* create our matrix */ 2590 ierr = MatCreate(comm,&B); 2591 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2592 ierr = MatSetType(B,type);CHKERRQ(ierr); 2593 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2594 a = (Mat_SeqBAIJ*)B->data; 2595 2596 /* set matrix "i" values */ 2597 a->i[0] = 0; 2598 for (i=1; i<= mbs; i++) { 2599 a->i[i] = a->i[i-1] + browlengths[i-1]; 2600 a->ilen[i-1] = browlengths[i-1]; 2601 } 2602 a->nz = 0; 2603 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2604 2605 /* read in nonzero values */ 2606 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2607 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2608 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2609 2610 /* set "a" and "j" values into matrix */ 2611 nzcount = 0; jcount = 0; 2612 for (i=0; i<mbs; i++) { 2613 nzcountb = nzcount; 2614 nmask = 0; 2615 for (j=0; j<bs; j++) { 2616 kmax = rowlengths[i*bs+j]; 2617 for (k=0; k<kmax; k++) { 2618 tmp = jj[nzcount++]/bs; 2619 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2620 } 2621 } 2622 /* sort the masked values */ 2623 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2624 2625 /* set "j" values into matrix */ 2626 maskcount = 1; 2627 for (j=0; j<nmask; j++) { 2628 a->j[jcount++] = masked[j]; 2629 mask[masked[j]] = maskcount++; 2630 } 2631 /* set "a" values into matrix */ 2632 ishift = bs2*a->i[i]; 2633 for (j=0; j<bs; j++) { 2634 kmax = rowlengths[i*bs+j]; 2635 for (k=0; k<kmax; k++) { 2636 tmp = jj[nzcountb]/bs ; 2637 block = mask[tmp] - 1; 2638 point = jj[nzcountb] - bs*tmp; 2639 idx = ishift + bs2*block + j + bs*point; 2640 a->a[idx] = (MatScalar)aa[nzcountb++]; 2641 } 2642 } 2643 /* zero out the mask elements we set */ 2644 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2645 } 2646 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2647 2648 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2649 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2650 ierr = PetscFree(aa);CHKERRQ(ierr); 2651 ierr = PetscFree(jj);CHKERRQ(ierr); 2652 ierr = PetscFree(mask);CHKERRQ(ierr); 2653 2654 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2655 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2656 ierr = MatView_Private(B);CHKERRQ(ierr); 2657 2658 *A = B; 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatCreateSeqBAIJ" 2664 /*@C 2665 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2666 compressed row) format. For good matrix assembly performance the 2667 user should preallocate the matrix storage by setting the parameter nz 2668 (or the array nnz). By setting these parameters accurately, performance 2669 during matrix assembly can be increased by more than a factor of 50. 2670 2671 Collective on MPI_Comm 2672 2673 Input Parameters: 2674 + comm - MPI communicator, set to PETSC_COMM_SELF 2675 . bs - size of block 2676 . m - number of rows 2677 . n - number of columns 2678 . nz - number of nonzero blocks per block row (same for all rows) 2679 - nnz - array containing the number of nonzero blocks in the various block rows 2680 (possibly different for each block row) or PETSC_NULL 2681 2682 Output Parameter: 2683 . A - the matrix 2684 2685 Options Database Keys: 2686 . -mat_no_unroll - uses code that does not unroll the loops in the 2687 block calculations (much slower) 2688 . -mat_block_size - size of the blocks to use 2689 2690 Level: intermediate 2691 2692 Notes: 2693 The number of rows and columns must be divisible by blocksize. 2694 2695 If the nnz parameter is given then the nz parameter is ignored 2696 2697 A nonzero block is any block that as 1 or more nonzeros in it 2698 2699 The block AIJ format is fully compatible with standard Fortran 77 2700 storage. That is, the stored row and column indices can begin at 2701 either one (as in Fortran) or zero. See the users' manual for details. 2702 2703 Specify the preallocated storage with either nz or nnz (not both). 2704 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2705 allocation. For additional details, see the users manual chapter on 2706 matrices. 2707 2708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2709 @*/ 2710 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2711 { 2712 PetscErrorCode ierr; 2713 2714 PetscFunctionBegin; 2715 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2716 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2717 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2718 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2724 /*@C 2725 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2726 per row in the matrix. For good matrix assembly performance the 2727 user should preallocate the matrix storage by setting the parameter nz 2728 (or the array nnz). By setting these parameters accurately, performance 2729 during matrix assembly can be increased by more than a factor of 50. 2730 2731 Collective on MPI_Comm 2732 2733 Input Parameters: 2734 + A - the matrix 2735 . bs - size of block 2736 . nz - number of block nonzeros per block row (same for all rows) 2737 - nnz - array containing the number of block nonzeros in the various block rows 2738 (possibly different for each block row) or PETSC_NULL 2739 2740 Options Database Keys: 2741 . -mat_no_unroll - uses code that does not unroll the loops in the 2742 block calculations (much slower) 2743 . -mat_block_size - size of the blocks to use 2744 2745 Level: intermediate 2746 2747 Notes: 2748 If the nnz parameter is given then the nz parameter is ignored 2749 2750 The block AIJ format is fully compatible with standard Fortran 77 2751 storage. That is, the stored row and column indices can begin at 2752 either one (as in Fortran) or zero. See the users' manual for details. 2753 2754 Specify the preallocated storage with either nz or nnz (not both). 2755 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2756 allocation. For additional details, see the users manual chapter on 2757 matrices. 2758 2759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2760 @*/ 2761 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2762 { 2763 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2764 2765 PetscFunctionBegin; 2766 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2767 if (f) { 2768 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2775 /*@ 2776 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2777 (upper triangular entries in CSR format) provided by the user. 2778 2779 Collective on MPI_Comm 2780 2781 Input Parameters: 2782 + comm - must be an MPI communicator of size 1 2783 . bs - size of block 2784 . m - number of rows 2785 . n - number of columns 2786 . i - row indices 2787 . j - column indices 2788 - a - matrix values 2789 2790 Output Parameter: 2791 . mat - the matrix 2792 2793 Level: intermediate 2794 2795 Notes: 2796 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2797 once the matrix is destroyed 2798 2799 You cannot set new nonzero locations into this matrix, that will generate an error. 2800 2801 The i and j indices are 0 based 2802 2803 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2804 2805 @*/ 2806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2807 { 2808 PetscErrorCode ierr; 2809 PetscInt ii; 2810 Mat_SeqBAIJ *baij; 2811 2812 PetscFunctionBegin; 2813 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2814 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2815 2816 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2817 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2818 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2819 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2820 baij = (Mat_SeqBAIJ*)(*mat)->data; 2821 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2822 2823 baij->i = i; 2824 baij->j = j; 2825 baij->a = a; 2826 baij->singlemalloc = PETSC_FALSE; 2827 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2828 baij->free_a = PETSC_FALSE; 2829 baij->free_ij = PETSC_FALSE; 2830 2831 for (ii=0; ii<m; ii++) { 2832 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2833 #if defined(PETSC_USE_DEBUG) 2834 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2835 #endif 2836 } 2837 #if defined(PETSC_USE_DEBUG) 2838 for (ii=0; ii<baij->i[m]; ii++) { 2839 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2840 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2841 } 2842 #endif 2843 2844 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2845 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2846 PetscFunctionReturn(0); 2847 } 2848