1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 785 PetscInt *tia, *tja; 786 787 PetscFunctionBegin; 788 *nn = n; 789 if (!ia) PetscFunctionReturn(0); 790 if (symmetric) { 791 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 792 } else { 793 tia = a->i; tja = a->j; 794 } 795 796 if (!blockcompressed) { 797 /* malloc & create the natural set of indices */ 798 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 799 for (i=0; i<n+1; i++) { 800 for (j=0; j<bs; j++) { 801 *ia[i*bs+j] = tia[i]*bs+j; 802 } 803 } 804 for (i=0; i<nz; i++) { 805 for (j=0; j<bs; j++) { 806 *ja[i*bs+j] = tia[i]*bs+j; 807 } 808 } 809 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 810 ierr = PetscFree(tia);CHKERRQ(ierr); 811 ierr = PetscFree(tja);CHKERRQ(ierr); 812 } 813 } else { 814 *ia = tia; 815 *ja = tja; 816 } 817 if (oshift == 1) { 818 for (i=0; i<nz; i++) (*ja)[i]++; 819 for (i=0; i<n+1; i++) (*ia)[i]++; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 826 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 829 PetscErrorCode ierr; 830 PetscInt i,n = a->mbs,nz = a->i[n]; 831 832 PetscFunctionBegin; 833 if (!ia) PetscFunctionReturn(0); 834 if (!blockcompressed) { 835 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 836 }else if (symmetric) { 837 ierr = PetscFree(*ia);CHKERRQ(ierr); 838 ierr = PetscFree(*ja);CHKERRQ(ierr); 839 } else if (oshift == 1) { /* blockcompressed */ 840 for (i=0; i<nz; i++) a->j[i]--; 841 for (i=0; i<n+1; i++) a->i[i]--; 842 } 843 PetscFunctionReturn(0); 844 } 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDestroy_SeqBAIJ" 848 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 849 { 850 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 851 PetscErrorCode ierr; 852 853 PetscFunctionBegin; 854 #if defined(PETSC_USE_LOG) 855 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 856 #endif 857 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 858 if (a->row) { 859 ierr = ISDestroy(a->row);CHKERRQ(ierr); 860 } 861 if (a->col) { 862 ierr = ISDestroy(a->col);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(a->diag);CHKERRQ(ierr); 865 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 866 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 867 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 868 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 869 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 870 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 871 #if defined(PETSC_USE_MAT_SINGLE) 872 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 873 #endif 874 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 875 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 876 877 ierr = PetscFree(a);CHKERRQ(ierr); 878 879 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatSetOption_SeqBAIJ" 892 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 893 { 894 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 switch (op) { 899 case MAT_ROW_ORIENTED: 900 a->roworiented = flg; 901 break; 902 case MAT_KEEP_ZEROED_ROWS: 903 a->keepzeroedrows = flg; 904 break; 905 case MAT_NEW_NONZERO_LOCATIONS: 906 a->nonew = (flg ? 0 : 1); 907 break; 908 case MAT_NEW_NONZERO_LOCATION_ERR: 909 a->nonew = (flg ? -1 : 0); 910 break; 911 case MAT_NEW_NONZERO_ALLOCATION_ERR: 912 a->nonew = (flg ? -2 : 0); 913 break; 914 case MAT_NEW_DIAGONALS: 915 case MAT_IGNORE_OFF_PROC_ENTRIES: 916 case MAT_USE_HASH_TABLE: 917 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 918 break; 919 case MAT_SYMMETRIC: 920 case MAT_STRUCTURALLY_SYMMETRIC: 921 case MAT_HERMITIAN: 922 case MAT_SYMMETRY_ETERNAL: 923 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 924 break; 925 default: 926 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 927 } 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatGetRow_SeqBAIJ" 933 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 934 { 935 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 938 MatScalar *aa,*aa_i; 939 PetscScalar *v_i; 940 941 PetscFunctionBegin; 942 bs = A->rmap.bs; 943 ai = a->i; 944 aj = a->j; 945 aa = a->a; 946 bs2 = a->bs2; 947 948 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 949 950 bn = row/bs; /* Block number */ 951 bp = row % bs; /* Block Position */ 952 M = ai[bn+1] - ai[bn]; 953 *nz = bs*M; 954 955 if (v) { 956 *v = 0; 957 if (*nz) { 958 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 959 for (i=0; i<M; i++) { /* for each block in the block row */ 960 v_i = *v + i*bs; 961 aa_i = aa + bs2*(ai[bn] + i); 962 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 963 } 964 } 965 } 966 967 if (idx) { 968 *idx = 0; 969 if (*nz) { 970 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 971 for (i=0; i<M; i++) { /* for each block in the block row */ 972 idx_i = *idx + i*bs; 973 itmp = bs*aj[ai[bn] + i]; 974 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 975 } 976 } 977 } 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 983 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 984 { 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 989 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatTranspose_SeqBAIJ" 995 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 996 { 997 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 998 Mat C; 999 PetscErrorCode ierr; 1000 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1001 PetscInt *rows,*cols,bs2=a->bs2; 1002 PetscScalar *array; 1003 1004 PetscFunctionBegin; 1005 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1006 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1007 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1008 1009 #if defined(PETSC_USE_MAT_SINGLE) 1010 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1011 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1012 #else 1013 array = a->a; 1014 #endif 1015 1016 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1017 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1018 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1019 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1020 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1021 ierr = PetscFree(col);CHKERRQ(ierr); 1022 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1023 cols = rows + bs; 1024 for (i=0; i<mbs; i++) { 1025 cols[0] = i*bs; 1026 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1027 len = ai[i+1] - ai[i]; 1028 for (j=0; j<len; j++) { 1029 rows[0] = (*aj++)*bs; 1030 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1031 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1032 array += bs2; 1033 } 1034 } 1035 ierr = PetscFree(rows);CHKERRQ(ierr); 1036 #if defined(PETSC_USE_MAT_SINGLE) 1037 ierr = PetscFree(array); 1038 #endif 1039 1040 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 1043 if (B) { 1044 *B = C; 1045 } else { 1046 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1053 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1054 { 1055 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1058 int fd; 1059 PetscScalar *aa; 1060 FILE *file; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1064 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1065 col_lens[0] = MAT_FILE_COOKIE; 1066 1067 col_lens[1] = A->rmap.N; 1068 col_lens[2] = A->cmap.n; 1069 col_lens[3] = a->nz*bs2; 1070 1071 /* store lengths of each row and write (including header) to file */ 1072 count = 0; 1073 for (i=0; i<a->mbs; i++) { 1074 for (j=0; j<bs; j++) { 1075 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1076 } 1077 } 1078 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1079 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1080 1081 /* store column indices (zero start index) */ 1082 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1083 count = 0; 1084 for (i=0; i<a->mbs; i++) { 1085 for (j=0; j<bs; j++) { 1086 for (k=a->i[i]; k<a->i[i+1]; k++) { 1087 for (l=0; l<bs; l++) { 1088 jj[count++] = bs*a->j[k] + l; 1089 } 1090 } 1091 } 1092 } 1093 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1094 ierr = PetscFree(jj);CHKERRQ(ierr); 1095 1096 /* store nonzero values */ 1097 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1098 count = 0; 1099 for (i=0; i<a->mbs; i++) { 1100 for (j=0; j<bs; j++) { 1101 for (k=a->i[i]; k<a->i[i+1]; k++) { 1102 for (l=0; l<bs; l++) { 1103 aa[count++] = a->a[bs2*k + l*bs + j]; 1104 } 1105 } 1106 } 1107 } 1108 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1109 ierr = PetscFree(aa);CHKERRQ(ierr); 1110 1111 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1112 if (file) { 1113 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1120 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1121 { 1122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1125 PetscViewerFormat format; 1126 1127 PetscFunctionBegin; 1128 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1129 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1130 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1131 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1132 Mat aij; 1133 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1134 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1135 ierr = MatDestroy(aij);CHKERRQ(ierr); 1136 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1137 PetscFunctionReturn(0); 1138 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1139 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1140 for (i=0; i<a->mbs; i++) { 1141 for (j=0; j<bs; j++) { 1142 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1143 for (k=a->i[i]; k<a->i[i+1]; k++) { 1144 for (l=0; l<bs; l++) { 1145 #if defined(PETSC_USE_COMPLEX) 1146 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1147 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1148 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1150 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1151 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1152 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1153 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1154 } 1155 #else 1156 if (a->a[bs2*k + l*bs + j] != 0.0) { 1157 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1158 } 1159 #endif 1160 } 1161 } 1162 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1163 } 1164 } 1165 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1166 } else { 1167 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1168 for (i=0; i<a->mbs; i++) { 1169 for (j=0; j<bs; j++) { 1170 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1171 for (k=a->i[i]; k<a->i[i+1]; k++) { 1172 for (l=0; l<bs; l++) { 1173 #if defined(PETSC_USE_COMPLEX) 1174 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1176 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1177 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1178 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1179 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1180 } else { 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1182 } 1183 #else 1184 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1185 #endif 1186 } 1187 } 1188 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1189 } 1190 } 1191 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1192 } 1193 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1199 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1200 { 1201 Mat A = (Mat) Aa; 1202 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1205 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1206 MatScalar *aa; 1207 PetscViewer viewer; 1208 1209 PetscFunctionBegin; 1210 1211 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1212 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1213 1214 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1215 1216 /* loop over matrix elements drawing boxes */ 1217 color = PETSC_DRAW_BLUE; 1218 for (i=0,row=0; i<mbs; i++,row+=bs) { 1219 for (j=a->i[i]; j<a->i[i+1]; j++) { 1220 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1221 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1222 aa = a->a + j*bs2; 1223 for (k=0; k<bs; k++) { 1224 for (l=0; l<bs; l++) { 1225 if (PetscRealPart(*aa++) >= 0.) continue; 1226 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1227 } 1228 } 1229 } 1230 } 1231 color = PETSC_DRAW_CYAN; 1232 for (i=0,row=0; i<mbs; i++,row+=bs) { 1233 for (j=a->i[i]; j<a->i[i+1]; j++) { 1234 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1235 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1236 aa = a->a + j*bs2; 1237 for (k=0; k<bs; k++) { 1238 for (l=0; l<bs; l++) { 1239 if (PetscRealPart(*aa++) != 0.) continue; 1240 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1241 } 1242 } 1243 } 1244 } 1245 1246 color = PETSC_DRAW_RED; 1247 for (i=0,row=0; i<mbs; i++,row+=bs) { 1248 for (j=a->i[i]; j<a->i[i+1]; j++) { 1249 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1250 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1251 aa = a->a + j*bs2; 1252 for (k=0; k<bs; k++) { 1253 for (l=0; l<bs; l++) { 1254 if (PetscRealPart(*aa++) <= 0.) continue; 1255 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1256 } 1257 } 1258 } 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1265 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1266 { 1267 PetscErrorCode ierr; 1268 PetscReal xl,yl,xr,yr,w,h; 1269 PetscDraw draw; 1270 PetscTruth isnull; 1271 1272 PetscFunctionBegin; 1273 1274 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1275 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1276 1277 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1278 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1279 xr += w; yr += h; xl = -w; yl = -h; 1280 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1281 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1282 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatView_SeqBAIJ" 1288 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1289 { 1290 PetscErrorCode ierr; 1291 PetscTruth iascii,isbinary,isdraw; 1292 1293 PetscFunctionBegin; 1294 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1296 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1297 if (iascii){ 1298 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1299 } else if (isbinary) { 1300 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1301 } else if (isdraw) { 1302 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1303 } else { 1304 Mat B; 1305 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1306 ierr = MatView(B,viewer);CHKERRQ(ierr); 1307 ierr = MatDestroy(B);CHKERRQ(ierr); 1308 } 1309 PetscFunctionReturn(0); 1310 } 1311 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1315 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1316 { 1317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1318 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1319 PetscInt *ai = a->i,*ailen = a->ilen; 1320 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1321 MatScalar *ap,*aa = a->a; 1322 1323 PetscFunctionBegin; 1324 for (k=0; k<m; k++) { /* loop over rows */ 1325 row = im[k]; brow = row/bs; 1326 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1327 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1328 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1329 nrow = ailen[brow]; 1330 for (l=0; l<n; l++) { /* loop over columns */ 1331 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1332 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1333 col = in[l] ; 1334 bcol = col/bs; 1335 cidx = col%bs; 1336 ridx = row%bs; 1337 high = nrow; 1338 low = 0; /* assume unsorted */ 1339 while (high-low > 5) { 1340 t = (low+high)/2; 1341 if (rp[t] > bcol) high = t; 1342 else low = t; 1343 } 1344 for (i=low; i<high; i++) { 1345 if (rp[i] > bcol) break; 1346 if (rp[i] == bcol) { 1347 *v++ = ap[bs2*i+bs*cidx+ridx]; 1348 goto finished; 1349 } 1350 } 1351 *v++ = 0.0; 1352 finished:; 1353 } 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #if defined(PETSC_USE_MAT_SINGLE) 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1361 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1362 { 1363 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1364 PetscErrorCode ierr; 1365 PetscInt i,N = m*n*b->bs2; 1366 MatScalar *vsingle; 1367 1368 PetscFunctionBegin; 1369 if (N > b->setvalueslen) { 1370 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1371 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1372 b->setvalueslen = N; 1373 } 1374 vsingle = b->setvaluescopy; 1375 for (i=0; i<N; i++) { 1376 vsingle[i] = v[i]; 1377 } 1378 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 #endif 1382 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1386 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1387 { 1388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1389 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1390 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1391 PetscErrorCode ierr; 1392 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1393 PetscTruth roworiented=a->roworiented; 1394 const MatScalar *value = v; 1395 MatScalar *ap,*aa = a->a,*bap; 1396 1397 PetscFunctionBegin; 1398 if (roworiented) { 1399 stepval = (n-1)*bs; 1400 } else { 1401 stepval = (m-1)*bs; 1402 } 1403 for (k=0; k<m; k++) { /* loop over added rows */ 1404 row = im[k]; 1405 if (row < 0) continue; 1406 #if defined(PETSC_USE_DEBUG) 1407 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1408 #endif 1409 rp = aj + ai[row]; 1410 ap = aa + bs2*ai[row]; 1411 rmax = imax[row]; 1412 nrow = ailen[row]; 1413 low = 0; 1414 high = nrow; 1415 for (l=0; l<n; l++) { /* loop over added columns */ 1416 if (in[l] < 0) continue; 1417 #if defined(PETSC_USE_DEBUG) 1418 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1419 #endif 1420 col = in[l]; 1421 if (roworiented) { 1422 value = v + k*(stepval+bs)*bs + l*bs; 1423 } else { 1424 value = v + l*(stepval+bs)*bs + k*bs; 1425 } 1426 if (col <= lastcol) low = 0; else high = nrow; 1427 lastcol = col; 1428 while (high-low > 7) { 1429 t = (low+high)/2; 1430 if (rp[t] > col) high = t; 1431 else low = t; 1432 } 1433 for (i=low; i<high; i++) { 1434 if (rp[i] > col) break; 1435 if (rp[i] == col) { 1436 bap = ap + bs2*i; 1437 if (roworiented) { 1438 if (is == ADD_VALUES) { 1439 for (ii=0; ii<bs; ii++,value+=stepval) { 1440 for (jj=ii; jj<bs2; jj+=bs) { 1441 bap[jj] += *value++; 1442 } 1443 } 1444 } else { 1445 for (ii=0; ii<bs; ii++,value+=stepval) { 1446 for (jj=ii; jj<bs2; jj+=bs) { 1447 bap[jj] = *value++; 1448 } 1449 } 1450 } 1451 } else { 1452 if (is == ADD_VALUES) { 1453 for (ii=0; ii<bs; ii++,value+=stepval) { 1454 for (jj=0; jj<bs; jj++) { 1455 *bap++ += *value++; 1456 } 1457 } 1458 } else { 1459 for (ii=0; ii<bs; ii++,value+=stepval) { 1460 for (jj=0; jj<bs; jj++) { 1461 *bap++ = *value++; 1462 } 1463 } 1464 } 1465 } 1466 goto noinsert2; 1467 } 1468 } 1469 if (nonew == 1) goto noinsert2; 1470 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1471 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1472 N = nrow++ - 1; high++; 1473 /* shift up all the later entries in this row */ 1474 for (ii=N; ii>=i; ii--) { 1475 rp[ii+1] = rp[ii]; 1476 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1477 } 1478 if (N >= i) { 1479 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1480 } 1481 rp[i] = col; 1482 bap = ap + bs2*i; 1483 if (roworiented) { 1484 for (ii=0; ii<bs; ii++,value+=stepval) { 1485 for (jj=ii; jj<bs2; jj+=bs) { 1486 bap[jj] = *value++; 1487 } 1488 } 1489 } else { 1490 for (ii=0; ii<bs; ii++,value+=stepval) { 1491 for (jj=0; jj<bs; jj++) { 1492 *bap++ = *value++; 1493 } 1494 } 1495 } 1496 noinsert2:; 1497 low = i; 1498 } 1499 ailen[row] = nrow; 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1506 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1507 { 1508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1509 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1510 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1511 PetscErrorCode ierr; 1512 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1513 MatScalar *aa = a->a,*ap; 1514 PetscReal ratio=0.6; 1515 1516 PetscFunctionBegin; 1517 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1518 1519 if (m) rmax = ailen[0]; 1520 for (i=1; i<mbs; i++) { 1521 /* move each row back by the amount of empty slots (fshift) before it*/ 1522 fshift += imax[i-1] - ailen[i-1]; 1523 rmax = PetscMax(rmax,ailen[i]); 1524 if (fshift) { 1525 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1526 N = ailen[i]; 1527 for (j=0; j<N; j++) { 1528 ip[j-fshift] = ip[j]; 1529 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1530 } 1531 } 1532 ai[i] = ai[i-1] + ailen[i-1]; 1533 } 1534 if (mbs) { 1535 fshift += imax[mbs-1] - ailen[mbs-1]; 1536 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1537 } 1538 /* reset ilen and imax for each row */ 1539 for (i=0; i<mbs; i++) { 1540 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1541 } 1542 a->nz = ai[mbs]; 1543 1544 /* diagonals may have moved, so kill the diagonal pointers */ 1545 a->idiagvalid = PETSC_FALSE; 1546 if (fshift && a->diag) { 1547 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1548 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1549 a->diag = 0; 1550 } 1551 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1552 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1553 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1554 a->reallocs = 0; 1555 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1556 1557 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1558 if (a->compressedrow.use){ 1559 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1560 } 1561 1562 A->same_nonzero = PETSC_TRUE; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 /* 1567 This function returns an array of flags which indicate the locations of contiguous 1568 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1569 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1570 Assume: sizes should be long enough to hold all the values. 1571 */ 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1574 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1575 { 1576 PetscInt i,j,k,row; 1577 PetscTruth flg; 1578 1579 PetscFunctionBegin; 1580 for (i=0,j=0; i<n; j++) { 1581 row = idx[i]; 1582 if (row%bs!=0) { /* Not the begining of a block */ 1583 sizes[j] = 1; 1584 i++; 1585 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1586 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1587 i++; 1588 } else { /* Begining of the block, so check if the complete block exists */ 1589 flg = PETSC_TRUE; 1590 for (k=1; k<bs; k++) { 1591 if (row+k != idx[i+k]) { /* break in the block */ 1592 flg = PETSC_FALSE; 1593 break; 1594 } 1595 } 1596 if (flg) { /* No break in the bs */ 1597 sizes[j] = bs; 1598 i+= bs; 1599 } else { 1600 sizes[j] = 1; 1601 i++; 1602 } 1603 } 1604 } 1605 *bs_max = j; 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1611 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1612 { 1613 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1614 PetscErrorCode ierr; 1615 PetscInt i,j,k,count,*rows; 1616 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1617 PetscScalar zero = 0.0; 1618 MatScalar *aa; 1619 1620 PetscFunctionBegin; 1621 /* Make a copy of the IS and sort it */ 1622 /* allocate memory for rows,sizes */ 1623 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1624 sizes = rows + is_n; 1625 1626 /* copy IS values to rows, and sort them */ 1627 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1628 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1629 if (baij->keepzeroedrows) { 1630 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1631 bs_max = is_n; 1632 A->same_nonzero = PETSC_TRUE; 1633 } else { 1634 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1635 A->same_nonzero = PETSC_FALSE; 1636 } 1637 1638 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1639 row = rows[j]; 1640 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1641 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1642 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1643 if (sizes[i] == bs && !baij->keepzeroedrows) { 1644 if (diag != 0.0) { 1645 if (baij->ilen[row/bs] > 0) { 1646 baij->ilen[row/bs] = 1; 1647 baij->j[baij->i[row/bs]] = row/bs; 1648 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1649 } 1650 /* Now insert all the diagonal values for this bs */ 1651 for (k=0; k<bs; k++) { 1652 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1653 } 1654 } else { /* (diag == 0.0) */ 1655 baij->ilen[row/bs] = 0; 1656 } /* end (diag == 0.0) */ 1657 } else { /* (sizes[i] != bs) */ 1658 #if defined (PETSC_USE_DEBUG) 1659 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1660 #endif 1661 for (k=0; k<count; k++) { 1662 aa[0] = zero; 1663 aa += bs; 1664 } 1665 if (diag != 0.0) { 1666 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 1671 ierr = PetscFree(rows);CHKERRQ(ierr); 1672 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1678 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1679 { 1680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1681 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1682 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1683 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1684 PetscErrorCode ierr; 1685 PetscInt ridx,cidx,bs2=a->bs2; 1686 PetscTruth roworiented=a->roworiented; 1687 MatScalar *ap,value,*aa=a->a,*bap; 1688 1689 PetscFunctionBegin; 1690 for (k=0; k<m; k++) { /* loop over added rows */ 1691 row = im[k]; 1692 brow = row/bs; 1693 if (row < 0) continue; 1694 #if defined(PETSC_USE_DEBUG) 1695 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1696 #endif 1697 rp = aj + ai[brow]; 1698 ap = aa + bs2*ai[brow]; 1699 rmax = imax[brow]; 1700 nrow = ailen[brow]; 1701 low = 0; 1702 high = nrow; 1703 for (l=0; l<n; l++) { /* loop over added columns */ 1704 if (in[l] < 0) continue; 1705 #if defined(PETSC_USE_DEBUG) 1706 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1707 #endif 1708 col = in[l]; bcol = col/bs; 1709 ridx = row % bs; cidx = col % bs; 1710 if (roworiented) { 1711 value = v[l + k*n]; 1712 } else { 1713 value = v[k + l*m]; 1714 } 1715 if (col <= lastcol) low = 0; else high = nrow; 1716 lastcol = col; 1717 while (high-low > 7) { 1718 t = (low+high)/2; 1719 if (rp[t] > bcol) high = t; 1720 else low = t; 1721 } 1722 for (i=low; i<high; i++) { 1723 if (rp[i] > bcol) break; 1724 if (rp[i] == bcol) { 1725 bap = ap + bs2*i + bs*cidx + ridx; 1726 if (is == ADD_VALUES) *bap += value; 1727 else *bap = value; 1728 goto noinsert1; 1729 } 1730 } 1731 if (nonew == 1) goto noinsert1; 1732 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1733 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1734 N = nrow++ - 1; high++; 1735 /* shift up all the later entries in this row */ 1736 for (ii=N; ii>=i; ii--) { 1737 rp[ii+1] = rp[ii]; 1738 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1739 } 1740 if (N>=i) { 1741 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1742 } 1743 rp[i] = bcol; 1744 ap[bs2*i + bs*cidx + ridx] = value; 1745 a->nz++; 1746 noinsert1:; 1747 low = i; 1748 } 1749 ailen[brow] = nrow; 1750 } 1751 A->same_nonzero = PETSC_FALSE; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1758 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1759 { 1760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1761 Mat outA; 1762 PetscErrorCode ierr; 1763 PetscTruth row_identity,col_identity; 1764 1765 PetscFunctionBegin; 1766 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1767 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1768 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1769 if (!row_identity || !col_identity) { 1770 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1771 } 1772 1773 outA = inA; 1774 inA->factor = FACTOR_LU; 1775 1776 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1777 1778 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1779 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1780 a->row = row; 1781 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1782 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1783 a->col = col; 1784 1785 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1786 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1787 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1788 1789 /* 1790 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1791 for ILU(0) factorization with natural ordering 1792 */ 1793 if (inA->rmap.bs < 8) { 1794 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1795 } else { 1796 if (!a->solve_work) { 1797 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1798 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1799 } 1800 } 1801 1802 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1803 1804 PetscFunctionReturn(0); 1805 } 1806 1807 EXTERN_C_BEGIN 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1810 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1811 { 1812 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1813 PetscInt i,nz,nbs; 1814 1815 PetscFunctionBegin; 1816 nz = baij->maxnz/baij->bs2; 1817 nbs = baij->nbs; 1818 for (i=0; i<nz; i++) { 1819 baij->j[i] = indices[i]; 1820 } 1821 baij->nz = nz; 1822 for (i=0; i<nbs; i++) { 1823 baij->ilen[i] = baij->imax[i]; 1824 } 1825 1826 PetscFunctionReturn(0); 1827 } 1828 EXTERN_C_END 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1832 /*@ 1833 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1834 in the matrix. 1835 1836 Input Parameters: 1837 + mat - the SeqBAIJ matrix 1838 - indices - the column indices 1839 1840 Level: advanced 1841 1842 Notes: 1843 This can be called if you have precomputed the nonzero structure of the 1844 matrix and want to provide it to the matrix object to improve the performance 1845 of the MatSetValues() operation. 1846 1847 You MUST have set the correct numbers of nonzeros per row in the call to 1848 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1849 1850 MUST be called before any calls to MatSetValues(); 1851 1852 @*/ 1853 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1854 { 1855 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1856 1857 PetscFunctionBegin; 1858 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1859 PetscValidPointer(indices,2); 1860 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1861 if (f) { 1862 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1863 } else { 1864 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1871 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1872 { 1873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1874 PetscErrorCode ierr; 1875 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1876 PetscReal atmp; 1877 PetscScalar *x,zero = 0.0; 1878 MatScalar *aa; 1879 PetscInt ncols,brow,krow,kcol; 1880 1881 PetscFunctionBegin; 1882 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1883 bs = A->rmap.bs; 1884 aa = a->a; 1885 ai = a->i; 1886 aj = a->j; 1887 mbs = a->mbs; 1888 1889 ierr = VecSet(v,zero);CHKERRQ(ierr); 1890 if (idx) { 1891 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1892 } 1893 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1894 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1895 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1896 for (i=0; i<mbs; i++) { 1897 ncols = ai[1] - ai[0]; ai++; 1898 brow = bs*i; 1899 for (j=0; j<ncols; j++){ 1900 for (kcol=0; kcol<bs; kcol++){ 1901 for (krow=0; krow<bs; krow++){ 1902 atmp = PetscAbsScalar(*aa);aa++; 1903 row = brow + krow; /* row index */ 1904 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1905 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1906 } 1907 } 1908 aj++; 1909 } 1910 } 1911 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatCopy_SeqBAIJ" 1917 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1918 { 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 /* If the two matrices have the same copy implementation, use fast copy. */ 1923 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1924 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1925 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1926 1927 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1928 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1929 } 1930 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1931 } else { 1932 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1933 } 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1939 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1940 { 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1950 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1951 { 1952 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1953 PetscFunctionBegin; 1954 *array = a->a; 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1960 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1961 { 1962 PetscFunctionBegin; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #include "petscblaslapack.h" 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1969 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1970 { 1971 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1972 PetscErrorCode ierr; 1973 PetscInt i,bs=Y->rmap.bs,j,bs2; 1974 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1975 1976 PetscFunctionBegin; 1977 if (str == SAME_NONZERO_PATTERN) { 1978 PetscScalar alpha = a; 1979 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1980 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1981 if (y->xtoy && y->XtoY != X) { 1982 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1983 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1984 } 1985 if (!y->xtoy) { /* get xtoy */ 1986 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1987 y->XtoY = X; 1988 } 1989 bs2 = bs*bs; 1990 for (i=0; i<x->nz; i++) { 1991 j = 0; 1992 while (j < bs2){ 1993 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1994 j++; 1995 } 1996 } 1997 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1998 } else { 1999 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2006 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2007 { 2008 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2009 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2010 PetscScalar *aa = a->a; 2011 2012 PetscFunctionBegin; 2013 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2019 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2020 { 2021 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2022 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2023 PetscScalar *aa = a->a; 2024 2025 PetscFunctionBegin; 2026 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2027 PetscFunctionReturn(0); 2028 } 2029 2030 2031 /* -------------------------------------------------------------------*/ 2032 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2033 MatGetRow_SeqBAIJ, 2034 MatRestoreRow_SeqBAIJ, 2035 MatMult_SeqBAIJ_N, 2036 /* 4*/ MatMultAdd_SeqBAIJ_N, 2037 MatMultTranspose_SeqBAIJ, 2038 MatMultTransposeAdd_SeqBAIJ, 2039 MatSolve_SeqBAIJ_N, 2040 0, 2041 0, 2042 /*10*/ 0, 2043 MatLUFactor_SeqBAIJ, 2044 0, 2045 0, 2046 MatTranspose_SeqBAIJ, 2047 /*15*/ MatGetInfo_SeqBAIJ, 2048 MatEqual_SeqBAIJ, 2049 MatGetDiagonal_SeqBAIJ, 2050 MatDiagonalScale_SeqBAIJ, 2051 MatNorm_SeqBAIJ, 2052 /*20*/ 0, 2053 MatAssemblyEnd_SeqBAIJ, 2054 0, 2055 MatSetOption_SeqBAIJ, 2056 MatZeroEntries_SeqBAIJ, 2057 /*25*/ MatZeroRows_SeqBAIJ, 2058 MatLUFactorSymbolic_SeqBAIJ, 2059 MatLUFactorNumeric_SeqBAIJ_N, 2060 MatCholeskyFactorSymbolic_SeqBAIJ, 2061 MatCholeskyFactorNumeric_SeqBAIJ_N, 2062 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2063 MatILUFactorSymbolic_SeqBAIJ, 2064 MatICCFactorSymbolic_SeqBAIJ, 2065 MatGetArray_SeqBAIJ, 2066 MatRestoreArray_SeqBAIJ, 2067 /*35*/ MatDuplicate_SeqBAIJ, 2068 0, 2069 0, 2070 MatILUFactor_SeqBAIJ, 2071 0, 2072 /*40*/ MatAXPY_SeqBAIJ, 2073 MatGetSubMatrices_SeqBAIJ, 2074 MatIncreaseOverlap_SeqBAIJ, 2075 MatGetValues_SeqBAIJ, 2076 MatCopy_SeqBAIJ, 2077 /*45*/ 0, 2078 MatScale_SeqBAIJ, 2079 0, 2080 0, 2081 0, 2082 /*50*/ 0, 2083 MatGetRowIJ_SeqBAIJ, 2084 MatRestoreRowIJ_SeqBAIJ, 2085 0, 2086 0, 2087 /*55*/ 0, 2088 0, 2089 0, 2090 0, 2091 MatSetValuesBlocked_SeqBAIJ, 2092 /*60*/ MatGetSubMatrix_SeqBAIJ, 2093 MatDestroy_SeqBAIJ, 2094 MatView_SeqBAIJ, 2095 0, 2096 0, 2097 /*65*/ 0, 2098 0, 2099 0, 2100 0, 2101 0, 2102 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2103 MatConvert_Basic, 2104 0, 2105 0, 2106 0, 2107 /*75*/ 0, 2108 0, 2109 0, 2110 0, 2111 0, 2112 /*80*/ 0, 2113 0, 2114 0, 2115 0, 2116 MatLoad_SeqBAIJ, 2117 /*85*/ 0, 2118 0, 2119 0, 2120 0, 2121 0, 2122 /*90*/ 0, 2123 0, 2124 0, 2125 0, 2126 0, 2127 /*95*/ 0, 2128 0, 2129 0, 2130 0, 2131 0, 2132 /*100*/0, 2133 0, 2134 0, 2135 0, 2136 0, 2137 /*105*/0, 2138 MatRealPart_SeqBAIJ, 2139 MatImaginaryPart_SeqBAIJ 2140 }; 2141 2142 EXTERN_C_BEGIN 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2145 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2146 { 2147 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2148 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 if (aij->nonew != 1) { 2153 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2154 } 2155 2156 /* allocate space for values if not already there */ 2157 if (!aij->saved_values) { 2158 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2159 } 2160 2161 /* copy values over */ 2162 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 EXTERN_C_END 2166 2167 EXTERN_C_BEGIN 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2170 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2171 { 2172 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2173 PetscErrorCode ierr; 2174 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2175 2176 PetscFunctionBegin; 2177 if (aij->nonew != 1) { 2178 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2179 } 2180 if (!aij->saved_values) { 2181 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2182 } 2183 2184 /* copy values over */ 2185 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 EXTERN_C_END 2189 2190 EXTERN_C_BEGIN 2191 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2192 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2193 EXTERN_C_END 2194 2195 EXTERN_C_BEGIN 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2198 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2199 { 2200 Mat_SeqBAIJ *b; 2201 PetscErrorCode ierr; 2202 PetscInt i,mbs,nbs,bs2,newbs = bs; 2203 PetscTruth flg,skipallocation = PETSC_FALSE; 2204 2205 PetscFunctionBegin; 2206 2207 if (nz == MAT_SKIP_ALLOCATION) { 2208 skipallocation = PETSC_TRUE; 2209 nz = 0; 2210 } 2211 2212 ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2213 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2214 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2215 2216 if (nnz && newbs != bs) { 2217 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2218 } 2219 bs = newbs; 2220 2221 B->rmap.bs = B->cmap.bs = bs; 2222 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2223 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2224 2225 B->preallocated = PETSC_TRUE; 2226 2227 mbs = B->rmap.n/bs; 2228 nbs = B->cmap.n/bs; 2229 bs2 = bs*bs; 2230 2231 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2232 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2233 } 2234 2235 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2236 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2237 if (nnz) { 2238 for (i=0; i<mbs; i++) { 2239 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2240 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2241 } 2242 } 2243 2244 b = (Mat_SeqBAIJ*)B->data; 2245 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2246 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2247 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2248 2249 B->ops->solve = MatSolve_SeqBAIJ_Update; 2250 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2251 if (!flg) { 2252 switch (bs) { 2253 case 1: 2254 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2255 B->ops->mult = MatMult_SeqBAIJ_1; 2256 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2257 break; 2258 case 2: 2259 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2260 B->ops->mult = MatMult_SeqBAIJ_2; 2261 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2262 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2263 break; 2264 case 3: 2265 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2266 B->ops->mult = MatMult_SeqBAIJ_3; 2267 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2268 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2269 break; 2270 case 4: 2271 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2272 B->ops->mult = MatMult_SeqBAIJ_4; 2273 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2274 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2275 break; 2276 case 5: 2277 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2278 B->ops->mult = MatMult_SeqBAIJ_5; 2279 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2280 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2281 break; 2282 case 6: 2283 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2284 B->ops->mult = MatMult_SeqBAIJ_6; 2285 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2286 break; 2287 case 7: 2288 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2289 B->ops->mult = MatMult_SeqBAIJ_7; 2290 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2291 break; 2292 default: 2293 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2294 B->ops->mult = MatMult_SeqBAIJ_N; 2295 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2296 break; 2297 } 2298 } 2299 B->rmap.bs = bs; 2300 b->mbs = mbs; 2301 b->nbs = nbs; 2302 if (!skipallocation) { 2303 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2304 /* b->ilen will count nonzeros in each block row so far. */ 2305 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2306 if (!nnz) { 2307 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2308 else if (nz <= 0) nz = 1; 2309 for (i=0; i<mbs; i++) b->imax[i] = nz; 2310 nz = nz*mbs; 2311 } else { 2312 nz = 0; 2313 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2314 } 2315 2316 /* allocate the matrix space */ 2317 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2318 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2319 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2320 b->singlemalloc = PETSC_TRUE; 2321 2322 b->i[0] = 0; 2323 for (i=1; i<mbs+1; i++) { 2324 b->i[i] = b->i[i-1] + b->imax[i-1]; 2325 } 2326 b->free_a = PETSC_TRUE; 2327 b->free_ij = PETSC_TRUE; 2328 } else { 2329 b->free_a = PETSC_FALSE; 2330 b->free_ij = PETSC_FALSE; 2331 } 2332 2333 B->rmap.bs = bs; 2334 b->bs2 = bs2; 2335 b->mbs = mbs; 2336 b->nz = 0; 2337 b->maxnz = nz*bs2; 2338 B->info.nz_unneeded = (PetscReal)b->maxnz; 2339 PetscFunctionReturn(0); 2340 } 2341 EXTERN_C_END 2342 2343 /*MC 2344 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2345 block sparse compressed row format. 2346 2347 Options Database Keys: 2348 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2349 2350 Level: beginner 2351 2352 .seealso: MatCreateSeqBAIJ() 2353 M*/ 2354 2355 EXTERN_C_BEGIN 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatCreate_SeqBAIJ" 2358 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2359 { 2360 PetscErrorCode ierr; 2361 PetscMPIInt size; 2362 Mat_SeqBAIJ *b; 2363 2364 PetscFunctionBegin; 2365 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2366 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2367 2368 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2369 B->data = (void*)b; 2370 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2371 B->factor = 0; 2372 B->mapping = 0; 2373 b->row = 0; 2374 b->col = 0; 2375 b->icol = 0; 2376 b->reallocs = 0; 2377 b->saved_values = 0; 2378 #if defined(PETSC_USE_MAT_SINGLE) 2379 b->setvalueslen = 0; 2380 b->setvaluescopy = PETSC_NULL; 2381 #endif 2382 2383 b->roworiented = PETSC_TRUE; 2384 b->nonew = 0; 2385 b->diag = 0; 2386 b->solve_work = 0; 2387 b->mult_work = 0; 2388 B->spptr = 0; 2389 B->info.nz_unneeded = (PetscReal)b->maxnz; 2390 b->keepzeroedrows = PETSC_FALSE; 2391 b->xtoy = 0; 2392 b->XtoY = 0; 2393 b->compressedrow.use = PETSC_FALSE; 2394 b->compressedrow.nrows = 0; 2395 b->compressedrow.i = PETSC_NULL; 2396 b->compressedrow.rindex = PETSC_NULL; 2397 b->compressedrow.checked = PETSC_FALSE; 2398 B->same_nonzero = PETSC_FALSE; 2399 2400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2401 "MatInvertBlockDiagonal_SeqBAIJ", 2402 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2404 "MatStoreValues_SeqBAIJ", 2405 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2407 "MatRetrieveValues_SeqBAIJ", 2408 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2410 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2411 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2413 "MatConvert_SeqBAIJ_SeqAIJ", 2414 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2416 "MatConvert_SeqBAIJ_SeqSBAIJ", 2417 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2418 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2419 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2420 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2421 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 EXTERN_C_END 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2428 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2429 { 2430 Mat C; 2431 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2432 PetscErrorCode ierr; 2433 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2434 2435 PetscFunctionBegin; 2436 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2437 2438 *B = 0; 2439 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2440 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2441 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2442 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2443 c = (Mat_SeqBAIJ*)C->data; 2444 2445 C->rmap.N = A->rmap.N; 2446 C->cmap.N = A->cmap.N; 2447 C->rmap.bs = A->rmap.bs; 2448 c->bs2 = a->bs2; 2449 c->mbs = a->mbs; 2450 c->nbs = a->nbs; 2451 2452 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2453 for (i=0; i<mbs; i++) { 2454 c->imax[i] = a->imax[i]; 2455 c->ilen[i] = a->ilen[i]; 2456 } 2457 2458 /* allocate the matrix space */ 2459 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2460 c->singlemalloc = PETSC_TRUE; 2461 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2462 if (mbs > 0) { 2463 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2464 if (cpvalues == MAT_COPY_VALUES) { 2465 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2466 } else { 2467 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2468 } 2469 } 2470 c->roworiented = a->roworiented; 2471 c->nonew = a->nonew; 2472 2473 if (a->diag) { 2474 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2475 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2476 for (i=0; i<mbs; i++) { 2477 c->diag[i] = a->diag[i]; 2478 } 2479 } else c->diag = 0; 2480 c->nz = a->nz; 2481 c->maxnz = a->maxnz; 2482 c->solve_work = 0; 2483 c->mult_work = 0; 2484 c->free_a = PETSC_TRUE; 2485 c->free_ij = PETSC_TRUE; 2486 C->preallocated = PETSC_TRUE; 2487 C->assembled = PETSC_TRUE; 2488 2489 c->compressedrow.use = a->compressedrow.use; 2490 c->compressedrow.nrows = a->compressedrow.nrows; 2491 c->compressedrow.checked = a->compressedrow.checked; 2492 if ( a->compressedrow.checked && a->compressedrow.use){ 2493 i = a->compressedrow.nrows; 2494 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2495 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2496 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2497 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2498 } else { 2499 c->compressedrow.use = PETSC_FALSE; 2500 c->compressedrow.i = PETSC_NULL; 2501 c->compressedrow.rindex = PETSC_NULL; 2502 } 2503 C->same_nonzero = A->same_nonzero; 2504 *B = C; 2505 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatLoad_SeqBAIJ" 2511 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2512 { 2513 Mat_SeqBAIJ *a; 2514 Mat B; 2515 PetscErrorCode ierr; 2516 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2517 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2518 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2519 PetscInt *masked,nmask,tmp,bs2,ishift; 2520 PetscMPIInt size; 2521 int fd; 2522 PetscScalar *aa; 2523 MPI_Comm comm = ((PetscObject)viewer)->comm; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2527 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2528 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2529 bs2 = bs*bs; 2530 2531 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2532 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2533 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2534 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2535 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2536 M = header[1]; N = header[2]; nz = header[3]; 2537 2538 if (header[3] < 0) { 2539 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2540 } 2541 2542 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2543 2544 /* 2545 This code adds extra rows to make sure the number of rows is 2546 divisible by the blocksize 2547 */ 2548 mbs = M/bs; 2549 extra_rows = bs - M + bs*(mbs); 2550 if (extra_rows == bs) extra_rows = 0; 2551 else mbs++; 2552 if (extra_rows) { 2553 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2554 } 2555 2556 /* read in row lengths */ 2557 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2558 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2559 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2560 2561 /* read in column indices */ 2562 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2563 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2564 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2565 2566 /* loop over row lengths determining block row lengths */ 2567 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2568 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2569 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2570 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2571 masked = mask + mbs; 2572 rowcount = 0; nzcount = 0; 2573 for (i=0; i<mbs; i++) { 2574 nmask = 0; 2575 for (j=0; j<bs; j++) { 2576 kmax = rowlengths[rowcount]; 2577 for (k=0; k<kmax; k++) { 2578 tmp = jj[nzcount++]/bs; 2579 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2580 } 2581 rowcount++; 2582 } 2583 browlengths[i] += nmask; 2584 /* zero out the mask elements we set */ 2585 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2586 } 2587 2588 /* create our matrix */ 2589 ierr = MatCreate(comm,&B); 2590 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2591 ierr = MatSetType(B,type);CHKERRQ(ierr); 2592 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2593 a = (Mat_SeqBAIJ*)B->data; 2594 2595 /* set matrix "i" values */ 2596 a->i[0] = 0; 2597 for (i=1; i<= mbs; i++) { 2598 a->i[i] = a->i[i-1] + browlengths[i-1]; 2599 a->ilen[i-1] = browlengths[i-1]; 2600 } 2601 a->nz = 0; 2602 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2603 2604 /* read in nonzero values */ 2605 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2606 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2607 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2608 2609 /* set "a" and "j" values into matrix */ 2610 nzcount = 0; jcount = 0; 2611 for (i=0; i<mbs; i++) { 2612 nzcountb = nzcount; 2613 nmask = 0; 2614 for (j=0; j<bs; j++) { 2615 kmax = rowlengths[i*bs+j]; 2616 for (k=0; k<kmax; k++) { 2617 tmp = jj[nzcount++]/bs; 2618 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2619 } 2620 } 2621 /* sort the masked values */ 2622 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2623 2624 /* set "j" values into matrix */ 2625 maskcount = 1; 2626 for (j=0; j<nmask; j++) { 2627 a->j[jcount++] = masked[j]; 2628 mask[masked[j]] = maskcount++; 2629 } 2630 /* set "a" values into matrix */ 2631 ishift = bs2*a->i[i]; 2632 for (j=0; j<bs; j++) { 2633 kmax = rowlengths[i*bs+j]; 2634 for (k=0; k<kmax; k++) { 2635 tmp = jj[nzcountb]/bs ; 2636 block = mask[tmp] - 1; 2637 point = jj[nzcountb] - bs*tmp; 2638 idx = ishift + bs2*block + j + bs*point; 2639 a->a[idx] = (MatScalar)aa[nzcountb++]; 2640 } 2641 } 2642 /* zero out the mask elements we set */ 2643 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2644 } 2645 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2646 2647 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2648 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2649 ierr = PetscFree(aa);CHKERRQ(ierr); 2650 ierr = PetscFree(jj);CHKERRQ(ierr); 2651 ierr = PetscFree(mask);CHKERRQ(ierr); 2652 2653 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2654 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2655 ierr = MatView_Private(B);CHKERRQ(ierr); 2656 2657 *A = B; 2658 PetscFunctionReturn(0); 2659 } 2660 2661 #undef __FUNCT__ 2662 #define __FUNCT__ "MatCreateSeqBAIJ" 2663 /*@C 2664 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2665 compressed row) format. For good matrix assembly performance the 2666 user should preallocate the matrix storage by setting the parameter nz 2667 (or the array nnz). By setting these parameters accurately, performance 2668 during matrix assembly can be increased by more than a factor of 50. 2669 2670 Collective on MPI_Comm 2671 2672 Input Parameters: 2673 + comm - MPI communicator, set to PETSC_COMM_SELF 2674 . bs - size of block 2675 . m - number of rows 2676 . n - number of columns 2677 . nz - number of nonzero blocks per block row (same for all rows) 2678 - nnz - array containing the number of nonzero blocks in the various block rows 2679 (possibly different for each block row) or PETSC_NULL 2680 2681 Output Parameter: 2682 . A - the matrix 2683 2684 Options Database Keys: 2685 . -mat_no_unroll - uses code that does not unroll the loops in the 2686 block calculations (much slower) 2687 . -mat_block_size - size of the blocks to use 2688 2689 Level: intermediate 2690 2691 Notes: 2692 The number of rows and columns must be divisible by blocksize. 2693 2694 If the nnz parameter is given then the nz parameter is ignored 2695 2696 A nonzero block is any block that as 1 or more nonzeros in it 2697 2698 The block AIJ format is fully compatible with standard Fortran 77 2699 storage. That is, the stored row and column indices can begin at 2700 either one (as in Fortran) or zero. See the users' manual for details. 2701 2702 Specify the preallocated storage with either nz or nnz (not both). 2703 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2704 allocation. For additional details, see the users manual chapter on 2705 matrices. 2706 2707 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2708 @*/ 2709 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2710 { 2711 PetscErrorCode ierr; 2712 2713 PetscFunctionBegin; 2714 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2715 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2716 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2717 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2718 PetscFunctionReturn(0); 2719 } 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2723 /*@C 2724 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2725 per row in the matrix. For good matrix assembly performance the 2726 user should preallocate the matrix storage by setting the parameter nz 2727 (or the array nnz). By setting these parameters accurately, performance 2728 during matrix assembly can be increased by more than a factor of 50. 2729 2730 Collective on MPI_Comm 2731 2732 Input Parameters: 2733 + A - the matrix 2734 . bs - size of block 2735 . nz - number of block nonzeros per block row (same for all rows) 2736 - nnz - array containing the number of block nonzeros in the various block rows 2737 (possibly different for each block row) or PETSC_NULL 2738 2739 Options Database Keys: 2740 . -mat_no_unroll - uses code that does not unroll the loops in the 2741 block calculations (much slower) 2742 . -mat_block_size - size of the blocks to use 2743 2744 Level: intermediate 2745 2746 Notes: 2747 If the nnz parameter is given then the nz parameter is ignored 2748 2749 The block AIJ format is fully compatible with standard Fortran 77 2750 storage. That is, the stored row and column indices can begin at 2751 either one (as in Fortran) or zero. See the users' manual for details. 2752 2753 Specify the preallocated storage with either nz or nnz (not both). 2754 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2755 allocation. For additional details, see the users manual chapter on 2756 matrices. 2757 2758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2759 @*/ 2760 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2761 { 2762 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2763 2764 PetscFunctionBegin; 2765 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2766 if (f) { 2767 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2768 } 2769 PetscFunctionReturn(0); 2770 } 2771 2772 #undef __FUNCT__ 2773 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2774 /*@ 2775 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2776 (upper triangular entries in CSR format) provided by the user. 2777 2778 Collective on MPI_Comm 2779 2780 Input Parameters: 2781 + comm - must be an MPI communicator of size 1 2782 . bs - size of block 2783 . m - number of rows 2784 . n - number of columns 2785 . i - row indices 2786 . j - column indices 2787 - a - matrix values 2788 2789 Output Parameter: 2790 . mat - the matrix 2791 2792 Level: intermediate 2793 2794 Notes: 2795 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2796 once the matrix is destroyed 2797 2798 You cannot set new nonzero locations into this matrix, that will generate an error. 2799 2800 The i and j indices are 0 based 2801 2802 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2803 2804 @*/ 2805 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2806 { 2807 PetscErrorCode ierr; 2808 PetscInt ii; 2809 Mat_SeqBAIJ *baij; 2810 2811 PetscFunctionBegin; 2812 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2813 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2814 2815 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2816 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2817 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2818 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2819 baij = (Mat_SeqBAIJ*)(*mat)->data; 2820 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2821 2822 baij->i = i; 2823 baij->j = j; 2824 baij->a = a; 2825 baij->singlemalloc = PETSC_FALSE; 2826 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2827 baij->free_a = PETSC_FALSE; 2828 baij->free_ij = PETSC_FALSE; 2829 2830 for (ii=0; ii<m; ii++) { 2831 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2832 #if defined(PETSC_USE_DEBUG) 2833 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2834 #endif 2835 } 2836 #if defined(PETSC_USE_DEBUG) 2837 for (ii=0; ii<baij->i[m]; ii++) { 2838 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2839 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2840 } 2841 #endif 2842 2843 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847