1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt n = a->mbs,i; 785 786 PetscFunctionBegin; 787 *nn = n; 788 if (!ia) PetscFunctionReturn(0); 789 if (symmetric) { 790 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 791 } else if (oshift == 1) { 792 /* temporarily add 1 to i and j indices */ 793 PetscInt nz = a->i[n]; 794 for (i=0; i<nz; i++) a->j[i]++; 795 for (i=0; i<n+1; i++) a->i[i]++; 796 *ia = a->i; *ja = a->j; 797 } else { 798 *ia = a->i; *ja = a->j; 799 } 800 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscErrorCode ierr; 810 PetscInt i,n = a->mbs; 811 812 PetscFunctionBegin; 813 if (!ia) PetscFunctionReturn(0); 814 if (symmetric) { 815 ierr = PetscFree(*ia);CHKERRQ(ierr); 816 ierr = PetscFree(*ja);CHKERRQ(ierr); 817 } else if (oshift == 1) { 818 PetscInt nz = a->i[n]-1; 819 for (i=0; i<nz; i++) a->j[i]--; 820 for (i=0; i<n+1; i++) a->i[i]--; 821 } 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatDestroy_SeqBAIJ" 827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 #if defined(PETSC_USE_LOG) 834 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 835 #endif 836 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 837 if (a->row) { 838 ierr = ISDestroy(a->row);CHKERRQ(ierr); 839 } 840 if (a->col) { 841 ierr = ISDestroy(a->col);CHKERRQ(ierr); 842 } 843 ierr = PetscFree(a->diag);CHKERRQ(ierr); 844 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 845 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 846 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 847 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 848 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 849 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 850 #if defined(PETSC_USE_MAT_SINGLE) 851 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 852 #endif 853 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 854 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 855 856 ierr = PetscFree(a);CHKERRQ(ierr); 857 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSetOption_SeqBAIJ" 870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 871 { 872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 switch (op) { 877 case MAT_ROW_ORIENTED: 878 a->roworiented = PETSC_TRUE; 879 break; 880 case MAT_COLUMN_ORIENTED: 881 a->roworiented = PETSC_FALSE; 882 break; 883 case MAT_COLUMNS_SORTED: 884 a->sorted = PETSC_TRUE; 885 break; 886 case MAT_COLUMNS_UNSORTED: 887 a->sorted = PETSC_FALSE; 888 break; 889 case MAT_KEEP_ZEROED_ROWS: 890 a->keepzeroedrows = PETSC_TRUE; 891 break; 892 case MAT_NO_NEW_NONZERO_LOCATIONS: 893 a->nonew = 1; 894 break; 895 case MAT_NEW_NONZERO_LOCATION_ERR: 896 a->nonew = -1; 897 break; 898 case MAT_NEW_NONZERO_ALLOCATION_ERR: 899 a->nonew = -2; 900 break; 901 case MAT_YES_NEW_NONZERO_LOCATIONS: 902 a->nonew = 0; 903 break; 904 case MAT_ROWS_SORTED: 905 case MAT_ROWS_UNSORTED: 906 case MAT_YES_NEW_DIAGONALS: 907 case MAT_IGNORE_OFF_PROC_ENTRIES: 908 case MAT_USE_HASH_TABLE: 909 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 910 break; 911 case MAT_NO_NEW_DIAGONALS: 912 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 913 case MAT_SYMMETRIC: 914 case MAT_STRUCTURALLY_SYMMETRIC: 915 case MAT_NOT_SYMMETRIC: 916 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 917 case MAT_HERMITIAN: 918 case MAT_NOT_HERMITIAN: 919 case MAT_SYMMETRY_ETERNAL: 920 case MAT_NOT_SYMMETRY_ETERNAL: 921 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 922 break; 923 default: 924 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "MatGetRow_SeqBAIJ" 931 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 932 { 933 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 934 PetscErrorCode ierr; 935 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 936 MatScalar *aa,*aa_i; 937 PetscScalar *v_i; 938 939 PetscFunctionBegin; 940 bs = A->rmap.bs; 941 ai = a->i; 942 aj = a->j; 943 aa = a->a; 944 bs2 = a->bs2; 945 946 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 947 948 bn = row/bs; /* Block number */ 949 bp = row % bs; /* Block Position */ 950 M = ai[bn+1] - ai[bn]; 951 *nz = bs*M; 952 953 if (v) { 954 *v = 0; 955 if (*nz) { 956 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 957 for (i=0; i<M; i++) { /* for each block in the block row */ 958 v_i = *v + i*bs; 959 aa_i = aa + bs2*(ai[bn] + i); 960 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 961 } 962 } 963 } 964 965 if (idx) { 966 *idx = 0; 967 if (*nz) { 968 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 969 for (i=0; i<M; i++) { /* for each block in the block row */ 970 idx_i = *idx + i*bs; 971 itmp = bs*aj[ai[bn] + i]; 972 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 973 } 974 } 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 981 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 982 { 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 987 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatTranspose_SeqBAIJ" 993 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 994 { 995 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 996 Mat C; 997 PetscErrorCode ierr; 998 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 999 PetscInt *rows,*cols,bs2=a->bs2; 1000 PetscScalar *array; 1001 1002 PetscFunctionBegin; 1003 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1004 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1005 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1006 1007 #if defined(PETSC_USE_MAT_SINGLE) 1008 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1009 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1010 #else 1011 array = a->a; 1012 #endif 1013 1014 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1015 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1016 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1017 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1018 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1019 ierr = PetscFree(col);CHKERRQ(ierr); 1020 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1021 cols = rows + bs; 1022 for (i=0; i<mbs; i++) { 1023 cols[0] = i*bs; 1024 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1025 len = ai[i+1] - ai[i]; 1026 for (j=0; j<len; j++) { 1027 rows[0] = (*aj++)*bs; 1028 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1029 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1030 array += bs2; 1031 } 1032 } 1033 ierr = PetscFree(rows);CHKERRQ(ierr); 1034 #if defined(PETSC_USE_MAT_SINGLE) 1035 ierr = PetscFree(array); 1036 #endif 1037 1038 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 1041 if (B) { 1042 *B = C; 1043 } else { 1044 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1045 } 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1051 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1052 { 1053 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1054 PetscErrorCode ierr; 1055 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1056 int fd; 1057 PetscScalar *aa; 1058 FILE *file; 1059 1060 PetscFunctionBegin; 1061 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1062 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1063 col_lens[0] = MAT_FILE_COOKIE; 1064 1065 col_lens[1] = A->rmap.N; 1066 col_lens[2] = A->cmap.n; 1067 col_lens[3] = a->nz*bs2; 1068 1069 /* store lengths of each row and write (including header) to file */ 1070 count = 0; 1071 for (i=0; i<a->mbs; i++) { 1072 for (j=0; j<bs; j++) { 1073 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1074 } 1075 } 1076 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1077 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1078 1079 /* store column indices (zero start index) */ 1080 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1081 count = 0; 1082 for (i=0; i<a->mbs; i++) { 1083 for (j=0; j<bs; j++) { 1084 for (k=a->i[i]; k<a->i[i+1]; k++) { 1085 for (l=0; l<bs; l++) { 1086 jj[count++] = bs*a->j[k] + l; 1087 } 1088 } 1089 } 1090 } 1091 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1092 ierr = PetscFree(jj);CHKERRQ(ierr); 1093 1094 /* store nonzero values */ 1095 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1096 count = 0; 1097 for (i=0; i<a->mbs; i++) { 1098 for (j=0; j<bs; j++) { 1099 for (k=a->i[i]; k<a->i[i+1]; k++) { 1100 for (l=0; l<bs; l++) { 1101 aa[count++] = a->a[bs2*k + l*bs + j]; 1102 } 1103 } 1104 } 1105 } 1106 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1107 ierr = PetscFree(aa);CHKERRQ(ierr); 1108 1109 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1110 if (file) { 1111 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1118 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1119 { 1120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1121 PetscErrorCode ierr; 1122 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1123 PetscViewerFormat format; 1124 1125 PetscFunctionBegin; 1126 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1127 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1128 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1129 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1130 Mat aij; 1131 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1132 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1133 ierr = MatDestroy(aij);CHKERRQ(ierr); 1134 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1135 PetscFunctionReturn(0); 1136 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1137 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1138 for (i=0; i<a->mbs; i++) { 1139 for (j=0; j<bs; j++) { 1140 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1141 for (k=a->i[i]; k<a->i[i+1]; k++) { 1142 for (l=0; l<bs; l++) { 1143 #if defined(PETSC_USE_COMPLEX) 1144 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1145 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1146 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1147 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1148 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1149 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1150 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1151 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1152 } 1153 #else 1154 if (a->a[bs2*k + l*bs + j] != 0.0) { 1155 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1156 } 1157 #endif 1158 } 1159 } 1160 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1161 } 1162 } 1163 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1164 } else { 1165 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1166 for (i=0; i<a->mbs; i++) { 1167 for (j=0; j<bs; j++) { 1168 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1169 for (k=a->i[i]; k<a->i[i+1]; k++) { 1170 for (l=0; l<bs; l++) { 1171 #if defined(PETSC_USE_COMPLEX) 1172 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1173 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1174 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1175 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1176 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1177 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1178 } else { 1179 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1180 } 1181 #else 1182 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1183 #endif 1184 } 1185 } 1186 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1187 } 1188 } 1189 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1190 } 1191 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1197 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1198 { 1199 Mat A = (Mat) Aa; 1200 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1201 PetscErrorCode ierr; 1202 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1203 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1204 MatScalar *aa; 1205 PetscViewer viewer; 1206 1207 PetscFunctionBegin; 1208 1209 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1210 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1211 1212 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1213 1214 /* loop over matrix elements drawing boxes */ 1215 color = PETSC_DRAW_BLUE; 1216 for (i=0,row=0; i<mbs; i++,row+=bs) { 1217 for (j=a->i[i]; j<a->i[i+1]; j++) { 1218 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1219 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1220 aa = a->a + j*bs2; 1221 for (k=0; k<bs; k++) { 1222 for (l=0; l<bs; l++) { 1223 if (PetscRealPart(*aa++) >= 0.) continue; 1224 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1225 } 1226 } 1227 } 1228 } 1229 color = PETSC_DRAW_CYAN; 1230 for (i=0,row=0; i<mbs; i++,row+=bs) { 1231 for (j=a->i[i]; j<a->i[i+1]; j++) { 1232 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1233 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1234 aa = a->a + j*bs2; 1235 for (k=0; k<bs; k++) { 1236 for (l=0; l<bs; l++) { 1237 if (PetscRealPart(*aa++) != 0.) continue; 1238 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1239 } 1240 } 1241 } 1242 } 1243 1244 color = PETSC_DRAW_RED; 1245 for (i=0,row=0; i<mbs; i++,row+=bs) { 1246 for (j=a->i[i]; j<a->i[i+1]; j++) { 1247 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1248 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1249 aa = a->a + j*bs2; 1250 for (k=0; k<bs; k++) { 1251 for (l=0; l<bs; l++) { 1252 if (PetscRealPart(*aa++) <= 0.) continue; 1253 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1254 } 1255 } 1256 } 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1263 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1264 { 1265 PetscErrorCode ierr; 1266 PetscReal xl,yl,xr,yr,w,h; 1267 PetscDraw draw; 1268 PetscTruth isnull; 1269 1270 PetscFunctionBegin; 1271 1272 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1273 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1274 1275 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1276 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1277 xr += w; yr += h; xl = -w; yl = -h; 1278 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1279 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1280 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatView_SeqBAIJ" 1286 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1287 { 1288 PetscErrorCode ierr; 1289 PetscTruth iascii,isbinary,isdraw; 1290 1291 PetscFunctionBegin; 1292 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1294 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1295 if (iascii){ 1296 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1297 } else if (isbinary) { 1298 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1299 } else if (isdraw) { 1300 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1301 } else { 1302 Mat B; 1303 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1304 ierr = MatView(B,viewer);CHKERRQ(ierr); 1305 ierr = MatDestroy(B);CHKERRQ(ierr); 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1313 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1314 { 1315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1316 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1317 PetscInt *ai = a->i,*ailen = a->ilen; 1318 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1319 MatScalar *ap,*aa = a->a,zero = 0.0; 1320 1321 PetscFunctionBegin; 1322 for (k=0; k<m; k++) { /* loop over rows */ 1323 row = im[k]; brow = row/bs; 1324 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1325 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1326 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1327 nrow = ailen[brow]; 1328 for (l=0; l<n; l++) { /* loop over columns */ 1329 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1330 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1331 col = in[l] ; 1332 bcol = col/bs; 1333 cidx = col%bs; 1334 ridx = row%bs; 1335 high = nrow; 1336 low = 0; /* assume unsorted */ 1337 while (high-low > 5) { 1338 t = (low+high)/2; 1339 if (rp[t] > bcol) high = t; 1340 else low = t; 1341 } 1342 for (i=low; i<high; i++) { 1343 if (rp[i] > bcol) break; 1344 if (rp[i] == bcol) { 1345 *v++ = ap[bs2*i+bs*cidx+ridx]; 1346 goto finished; 1347 } 1348 } 1349 *v++ = zero; 1350 finished:; 1351 } 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #if defined(PETSC_USE_MAT_SINGLE) 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1359 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1360 { 1361 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1362 PetscErrorCode ierr; 1363 PetscInt i,N = m*n*b->bs2; 1364 MatScalar *vsingle; 1365 1366 PetscFunctionBegin; 1367 if (N > b->setvalueslen) { 1368 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1369 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1370 b->setvalueslen = N; 1371 } 1372 vsingle = b->setvaluescopy; 1373 for (i=0; i<N; i++) { 1374 vsingle[i] = v[i]; 1375 } 1376 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 #endif 1380 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1384 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1385 { 1386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1387 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1388 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1389 PetscErrorCode ierr; 1390 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1391 PetscTruth roworiented=a->roworiented; 1392 const MatScalar *value = v; 1393 MatScalar *ap,*aa = a->a,*bap; 1394 1395 PetscFunctionBegin; 1396 if (roworiented) { 1397 stepval = (n-1)*bs; 1398 } else { 1399 stepval = (m-1)*bs; 1400 } 1401 for (k=0; k<m; k++) { /* loop over added rows */ 1402 row = im[k]; 1403 if (row < 0) continue; 1404 #if defined(PETSC_USE_DEBUG) 1405 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1406 #endif 1407 rp = aj + ai[row]; 1408 ap = aa + bs2*ai[row]; 1409 rmax = imax[row]; 1410 nrow = ailen[row]; 1411 low = 0; 1412 high = nrow; 1413 for (l=0; l<n; l++) { /* loop over added columns */ 1414 if (in[l] < 0) continue; 1415 #if defined(PETSC_USE_DEBUG) 1416 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1417 #endif 1418 col = in[l]; 1419 if (roworiented) { 1420 value = v + k*(stepval+bs)*bs + l*bs; 1421 } else { 1422 value = v + l*(stepval+bs)*bs + k*bs; 1423 } 1424 if (col <= lastcol) low = 0; else high = nrow; 1425 lastcol = col; 1426 while (high-low > 7) { 1427 t = (low+high)/2; 1428 if (rp[t] > col) high = t; 1429 else low = t; 1430 } 1431 for (i=low; i<high; i++) { 1432 if (rp[i] > col) break; 1433 if (rp[i] == col) { 1434 bap = ap + bs2*i; 1435 if (roworiented) { 1436 if (is == ADD_VALUES) { 1437 for (ii=0; ii<bs; ii++,value+=stepval) { 1438 for (jj=ii; jj<bs2; jj+=bs) { 1439 bap[jj] += *value++; 1440 } 1441 } 1442 } else { 1443 for (ii=0; ii<bs; ii++,value+=stepval) { 1444 for (jj=ii; jj<bs2; jj+=bs) { 1445 bap[jj] = *value++; 1446 } 1447 } 1448 } 1449 } else { 1450 if (is == ADD_VALUES) { 1451 for (ii=0; ii<bs; ii++,value+=stepval) { 1452 for (jj=0; jj<bs; jj++) { 1453 *bap++ += *value++; 1454 } 1455 } 1456 } else { 1457 for (ii=0; ii<bs; ii++,value+=stepval) { 1458 for (jj=0; jj<bs; jj++) { 1459 *bap++ = *value++; 1460 } 1461 } 1462 } 1463 } 1464 goto noinsert2; 1465 } 1466 } 1467 if (nonew == 1) goto noinsert2; 1468 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1469 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1470 N = nrow++ - 1; high++; 1471 /* shift up all the later entries in this row */ 1472 for (ii=N; ii>=i; ii--) { 1473 rp[ii+1] = rp[ii]; 1474 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1475 } 1476 if (N >= i) { 1477 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1478 } 1479 rp[i] = col; 1480 bap = ap + bs2*i; 1481 if (roworiented) { 1482 for (ii=0; ii<bs; ii++,value+=stepval) { 1483 for (jj=ii; jj<bs2; jj+=bs) { 1484 bap[jj] = *value++; 1485 } 1486 } 1487 } else { 1488 for (ii=0; ii<bs; ii++,value+=stepval) { 1489 for (jj=0; jj<bs; jj++) { 1490 *bap++ = *value++; 1491 } 1492 } 1493 } 1494 noinsert2:; 1495 low = i; 1496 } 1497 ailen[row] = nrow; 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1504 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1505 { 1506 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1507 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1508 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1509 PetscErrorCode ierr; 1510 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1511 MatScalar *aa = a->a,*ap; 1512 PetscReal ratio=0.6; 1513 1514 PetscFunctionBegin; 1515 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1516 1517 if (m) rmax = ailen[0]; 1518 for (i=1; i<mbs; i++) { 1519 /* move each row back by the amount of empty slots (fshift) before it*/ 1520 fshift += imax[i-1] - ailen[i-1]; 1521 rmax = PetscMax(rmax,ailen[i]); 1522 if (fshift) { 1523 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1524 N = ailen[i]; 1525 for (j=0; j<N; j++) { 1526 ip[j-fshift] = ip[j]; 1527 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1528 } 1529 } 1530 ai[i] = ai[i-1] + ailen[i-1]; 1531 } 1532 if (mbs) { 1533 fshift += imax[mbs-1] - ailen[mbs-1]; 1534 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1535 } 1536 /* reset ilen and imax for each row */ 1537 for (i=0; i<mbs; i++) { 1538 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1539 } 1540 a->nz = ai[mbs]; 1541 1542 /* diagonals may have moved, so kill the diagonal pointers */ 1543 a->idiagvalid = PETSC_FALSE; 1544 if (fshift && a->diag) { 1545 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1546 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1547 a->diag = 0; 1548 } 1549 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1550 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1551 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1552 a->reallocs = 0; 1553 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1554 1555 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1556 if (a->compressedrow.use){ 1557 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1558 } 1559 1560 A->same_nonzero = PETSC_TRUE; 1561 PetscFunctionReturn(0); 1562 } 1563 1564 /* 1565 This function returns an array of flags which indicate the locations of contiguous 1566 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1567 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1568 Assume: sizes should be long enough to hold all the values. 1569 */ 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1572 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1573 { 1574 PetscInt i,j,k,row; 1575 PetscTruth flg; 1576 1577 PetscFunctionBegin; 1578 for (i=0,j=0; i<n; j++) { 1579 row = idx[i]; 1580 if (row%bs!=0) { /* Not the begining of a block */ 1581 sizes[j] = 1; 1582 i++; 1583 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1584 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1585 i++; 1586 } else { /* Begining of the block, so check if the complete block exists */ 1587 flg = PETSC_TRUE; 1588 for (k=1; k<bs; k++) { 1589 if (row+k != idx[i+k]) { /* break in the block */ 1590 flg = PETSC_FALSE; 1591 break; 1592 } 1593 } 1594 if (flg) { /* No break in the bs */ 1595 sizes[j] = bs; 1596 i+= bs; 1597 } else { 1598 sizes[j] = 1; 1599 i++; 1600 } 1601 } 1602 } 1603 *bs_max = j; 1604 PetscFunctionReturn(0); 1605 } 1606 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1609 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1610 { 1611 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1612 PetscErrorCode ierr; 1613 PetscInt i,j,k,count,*rows; 1614 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1615 PetscScalar zero = 0.0; 1616 MatScalar *aa; 1617 1618 PetscFunctionBegin; 1619 /* Make a copy of the IS and sort it */ 1620 /* allocate memory for rows,sizes */ 1621 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1622 sizes = rows + is_n; 1623 1624 /* copy IS values to rows, and sort them */ 1625 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1626 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1627 if (baij->keepzeroedrows) { 1628 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1629 bs_max = is_n; 1630 A->same_nonzero = PETSC_TRUE; 1631 } else { 1632 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1633 A->same_nonzero = PETSC_FALSE; 1634 } 1635 1636 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1637 row = rows[j]; 1638 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1639 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1640 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1641 if (sizes[i] == bs && !baij->keepzeroedrows) { 1642 if (diag != 0.0) { 1643 if (baij->ilen[row/bs] > 0) { 1644 baij->ilen[row/bs] = 1; 1645 baij->j[baij->i[row/bs]] = row/bs; 1646 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1647 } 1648 /* Now insert all the diagonal values for this bs */ 1649 for (k=0; k<bs; k++) { 1650 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1651 } 1652 } else { /* (diag == 0.0) */ 1653 baij->ilen[row/bs] = 0; 1654 } /* end (diag == 0.0) */ 1655 } else { /* (sizes[i] != bs) */ 1656 #if defined (PETSC_USE_DEBUG) 1657 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1658 #endif 1659 for (k=0; k<count; k++) { 1660 aa[0] = zero; 1661 aa += bs; 1662 } 1663 if (diag != 0.0) { 1664 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1665 } 1666 } 1667 } 1668 1669 ierr = PetscFree(rows);CHKERRQ(ierr); 1670 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1676 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1677 { 1678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1679 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1680 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1681 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1682 PetscErrorCode ierr; 1683 PetscInt ridx,cidx,bs2=a->bs2; 1684 PetscTruth roworiented=a->roworiented; 1685 MatScalar *ap,value,*aa=a->a,*bap; 1686 1687 PetscFunctionBegin; 1688 for (k=0; k<m; k++) { /* loop over added rows */ 1689 row = im[k]; 1690 brow = row/bs; 1691 if (row < 0) continue; 1692 #if defined(PETSC_USE_DEBUG) 1693 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1694 #endif 1695 rp = aj + ai[brow]; 1696 ap = aa + bs2*ai[brow]; 1697 rmax = imax[brow]; 1698 nrow = ailen[brow]; 1699 low = 0; 1700 high = nrow; 1701 for (l=0; l<n; l++) { /* loop over added columns */ 1702 if (in[l] < 0) continue; 1703 #if defined(PETSC_USE_DEBUG) 1704 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1705 #endif 1706 col = in[l]; bcol = col/bs; 1707 ridx = row % bs; cidx = col % bs; 1708 if (roworiented) { 1709 value = v[l + k*n]; 1710 } else { 1711 value = v[k + l*m]; 1712 } 1713 if (col <= lastcol) low = 0; else high = nrow; 1714 lastcol = col; 1715 while (high-low > 7) { 1716 t = (low+high)/2; 1717 if (rp[t] > bcol) high = t; 1718 else low = t; 1719 } 1720 for (i=low; i<high; i++) { 1721 if (rp[i] > bcol) break; 1722 if (rp[i] == bcol) { 1723 bap = ap + bs2*i + bs*cidx + ridx; 1724 if (is == ADD_VALUES) *bap += value; 1725 else *bap = value; 1726 goto noinsert1; 1727 } 1728 } 1729 if (nonew == 1) goto noinsert1; 1730 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1731 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1732 N = nrow++ - 1; high++; 1733 /* shift up all the later entries in this row */ 1734 for (ii=N; ii>=i; ii--) { 1735 rp[ii+1] = rp[ii]; 1736 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1737 } 1738 if (N>=i) { 1739 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1740 } 1741 rp[i] = bcol; 1742 ap[bs2*i + bs*cidx + ridx] = value; 1743 a->nz++; 1744 noinsert1:; 1745 low = i; 1746 } 1747 ailen[brow] = nrow; 1748 } 1749 A->same_nonzero = PETSC_FALSE; 1750 PetscFunctionReturn(0); 1751 } 1752 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1756 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1757 { 1758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1759 Mat outA; 1760 PetscErrorCode ierr; 1761 PetscTruth row_identity,col_identity; 1762 1763 PetscFunctionBegin; 1764 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1765 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1766 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1767 if (!row_identity || !col_identity) { 1768 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1769 } 1770 1771 outA = inA; 1772 inA->factor = FACTOR_LU; 1773 1774 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1775 1776 a->row = row; 1777 a->col = col; 1778 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1779 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1780 1781 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1782 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1783 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1784 1785 /* 1786 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1787 for ILU(0) factorization with natural ordering 1788 */ 1789 if (inA->rmap.bs < 8) { 1790 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1791 } else { 1792 if (!a->solve_work) { 1793 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1794 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1795 } 1796 } 1797 1798 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1799 1800 PetscFunctionReturn(0); 1801 } 1802 1803 EXTERN_C_BEGIN 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1806 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1807 { 1808 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1809 PetscInt i,nz,nbs; 1810 1811 PetscFunctionBegin; 1812 nz = baij->maxnz/baij->bs2; 1813 nbs = baij->nbs; 1814 for (i=0; i<nz; i++) { 1815 baij->j[i] = indices[i]; 1816 } 1817 baij->nz = nz; 1818 for (i=0; i<nbs; i++) { 1819 baij->ilen[i] = baij->imax[i]; 1820 } 1821 1822 PetscFunctionReturn(0); 1823 } 1824 EXTERN_C_END 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1828 /*@ 1829 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1830 in the matrix. 1831 1832 Input Parameters: 1833 + mat - the SeqBAIJ matrix 1834 - indices - the column indices 1835 1836 Level: advanced 1837 1838 Notes: 1839 This can be called if you have precomputed the nonzero structure of the 1840 matrix and want to provide it to the matrix object to improve the performance 1841 of the MatSetValues() operation. 1842 1843 You MUST have set the correct numbers of nonzeros per row in the call to 1844 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1845 1846 MUST be called before any calls to MatSetValues(); 1847 1848 @*/ 1849 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1850 { 1851 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1852 1853 PetscFunctionBegin; 1854 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1855 PetscValidPointer(indices,2); 1856 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1857 if (f) { 1858 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1859 } else { 1860 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1861 } 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1867 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1868 { 1869 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1870 PetscErrorCode ierr; 1871 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1872 PetscReal atmp; 1873 PetscScalar *x,zero = 0.0; 1874 MatScalar *aa; 1875 PetscInt ncols,brow,krow,kcol; 1876 1877 PetscFunctionBegin; 1878 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1879 bs = A->rmap.bs; 1880 aa = a->a; 1881 ai = a->i; 1882 aj = a->j; 1883 mbs = a->mbs; 1884 1885 ierr = VecSet(v,zero);CHKERRQ(ierr); 1886 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1887 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1888 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1889 for (i=0; i<mbs; i++) { 1890 ncols = ai[1] - ai[0]; ai++; 1891 brow = bs*i; 1892 for (j=0; j<ncols; j++){ 1893 /* bcol = bs*(*aj); */ 1894 for (kcol=0; kcol<bs; kcol++){ 1895 for (krow=0; krow<bs; krow++){ 1896 atmp = PetscAbsScalar(*aa); aa++; 1897 row = brow + krow; /* row index */ 1898 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1899 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1900 } 1901 } 1902 aj++; 1903 } 1904 } 1905 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #undef __FUNCT__ 1910 #define __FUNCT__ "MatCopy_SeqBAIJ" 1911 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1912 { 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 /* If the two matrices have the same copy implementation, use fast copy. */ 1917 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1918 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1919 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1920 1921 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1922 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1923 } 1924 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1925 } else { 1926 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1927 } 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1933 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1934 { 1935 PetscErrorCode ierr; 1936 1937 PetscFunctionBegin; 1938 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1944 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1945 { 1946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1947 PetscFunctionBegin; 1948 *array = a->a; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1954 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1955 { 1956 PetscFunctionBegin; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #include "petscblaslapack.h" 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1963 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1964 { 1965 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1966 PetscErrorCode ierr; 1967 PetscInt i,bs=Y->rmap.bs,j,bs2; 1968 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1969 1970 PetscFunctionBegin; 1971 if (str == SAME_NONZERO_PATTERN) { 1972 PetscScalar alpha = a; 1973 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1974 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1975 if (y->xtoy && y->XtoY != X) { 1976 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1977 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1978 } 1979 if (!y->xtoy) { /* get xtoy */ 1980 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1981 y->XtoY = X; 1982 } 1983 bs2 = bs*bs; 1984 for (i=0; i<x->nz; i++) { 1985 j = 0; 1986 while (j < bs2){ 1987 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1988 j++; 1989 } 1990 } 1991 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1992 } else { 1993 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2000 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2001 { 2002 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2003 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2004 PetscScalar *aa = a->a; 2005 2006 PetscFunctionBegin; 2007 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2013 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2014 { 2015 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2016 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2017 PetscScalar *aa = a->a; 2018 2019 PetscFunctionBegin; 2020 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 2025 /* -------------------------------------------------------------------*/ 2026 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2027 MatGetRow_SeqBAIJ, 2028 MatRestoreRow_SeqBAIJ, 2029 MatMult_SeqBAIJ_N, 2030 /* 4*/ MatMultAdd_SeqBAIJ_N, 2031 MatMultTranspose_SeqBAIJ, 2032 MatMultTransposeAdd_SeqBAIJ, 2033 MatSolve_SeqBAIJ_N, 2034 0, 2035 0, 2036 /*10*/ 0, 2037 MatLUFactor_SeqBAIJ, 2038 0, 2039 0, 2040 MatTranspose_SeqBAIJ, 2041 /*15*/ MatGetInfo_SeqBAIJ, 2042 MatEqual_SeqBAIJ, 2043 MatGetDiagonal_SeqBAIJ, 2044 MatDiagonalScale_SeqBAIJ, 2045 MatNorm_SeqBAIJ, 2046 /*20*/ 0, 2047 MatAssemblyEnd_SeqBAIJ, 2048 0, 2049 MatSetOption_SeqBAIJ, 2050 MatZeroEntries_SeqBAIJ, 2051 /*25*/ MatZeroRows_SeqBAIJ, 2052 MatLUFactorSymbolic_SeqBAIJ, 2053 MatLUFactorNumeric_SeqBAIJ_N, 2054 MatCholeskyFactorSymbolic_SeqBAIJ, 2055 MatCholeskyFactorNumeric_SeqBAIJ_N, 2056 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2057 MatILUFactorSymbolic_SeqBAIJ, 2058 MatICCFactorSymbolic_SeqBAIJ, 2059 MatGetArray_SeqBAIJ, 2060 MatRestoreArray_SeqBAIJ, 2061 /*35*/ MatDuplicate_SeqBAIJ, 2062 0, 2063 0, 2064 MatILUFactor_SeqBAIJ, 2065 0, 2066 /*40*/ MatAXPY_SeqBAIJ, 2067 MatGetSubMatrices_SeqBAIJ, 2068 MatIncreaseOverlap_SeqBAIJ, 2069 MatGetValues_SeqBAIJ, 2070 MatCopy_SeqBAIJ, 2071 /*45*/ 0, 2072 MatScale_SeqBAIJ, 2073 0, 2074 0, 2075 0, 2076 /*50*/ 0, 2077 MatGetRowIJ_SeqBAIJ, 2078 MatRestoreRowIJ_SeqBAIJ, 2079 0, 2080 0, 2081 /*55*/ 0, 2082 0, 2083 0, 2084 0, 2085 MatSetValuesBlocked_SeqBAIJ, 2086 /*60*/ MatGetSubMatrix_SeqBAIJ, 2087 MatDestroy_SeqBAIJ, 2088 MatView_SeqBAIJ, 2089 0, 2090 0, 2091 /*65*/ 0, 2092 0, 2093 0, 2094 0, 2095 0, 2096 /*70*/ MatGetRowMax_SeqBAIJ, 2097 MatConvert_Basic, 2098 0, 2099 0, 2100 0, 2101 /*75*/ 0, 2102 0, 2103 0, 2104 0, 2105 0, 2106 /*80*/ 0, 2107 0, 2108 0, 2109 0, 2110 MatLoad_SeqBAIJ, 2111 /*85*/ 0, 2112 0, 2113 0, 2114 0, 2115 0, 2116 /*90*/ 0, 2117 0, 2118 0, 2119 0, 2120 0, 2121 /*95*/ 0, 2122 0, 2123 0, 2124 0, 2125 0, 2126 /*100*/0, 2127 0, 2128 0, 2129 0, 2130 0, 2131 /*105*/0, 2132 MatRealPart_SeqBAIJ, 2133 MatImaginaryPart_SeqBAIJ 2134 }; 2135 2136 EXTERN_C_BEGIN 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2139 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2140 { 2141 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2142 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 if (aij->nonew != 1) { 2147 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2148 } 2149 2150 /* allocate space for values if not already there */ 2151 if (!aij->saved_values) { 2152 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2153 } 2154 2155 /* copy values over */ 2156 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 EXTERN_C_END 2160 2161 EXTERN_C_BEGIN 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2164 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2165 { 2166 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2167 PetscErrorCode ierr; 2168 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2169 2170 PetscFunctionBegin; 2171 if (aij->nonew != 1) { 2172 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2173 } 2174 if (!aij->saved_values) { 2175 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2176 } 2177 2178 /* copy values over */ 2179 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 EXTERN_C_END 2183 2184 EXTERN_C_BEGIN 2185 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2186 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2187 EXTERN_C_END 2188 2189 EXTERN_C_BEGIN 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2192 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2193 { 2194 Mat_SeqBAIJ *b; 2195 PetscErrorCode ierr; 2196 PetscInt i,mbs,nbs,bs2,newbs = bs; 2197 PetscTruth flg,skipallocation = PETSC_FALSE; 2198 2199 PetscFunctionBegin; 2200 2201 if (nz == MAT_SKIP_ALLOCATION) { 2202 skipallocation = PETSC_TRUE; 2203 nz = 0; 2204 } 2205 2206 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2207 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2208 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2209 2210 if (nnz && newbs != bs) { 2211 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2212 } 2213 bs = newbs; 2214 2215 B->rmap.bs = B->cmap.bs = bs; 2216 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2217 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2218 2219 B->preallocated = PETSC_TRUE; 2220 2221 mbs = B->rmap.n/bs; 2222 nbs = B->cmap.n/bs; 2223 bs2 = bs*bs; 2224 2225 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2226 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2227 } 2228 2229 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2230 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2231 if (nnz) { 2232 for (i=0; i<mbs; i++) { 2233 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2234 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2235 } 2236 } 2237 2238 b = (Mat_SeqBAIJ*)B->data; 2239 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2240 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2241 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2242 2243 B->ops->solve = MatSolve_SeqBAIJ_Update; 2244 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2245 if (!flg) { 2246 switch (bs) { 2247 case 1: 2248 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2249 B->ops->mult = MatMult_SeqBAIJ_1; 2250 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2251 break; 2252 case 2: 2253 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2254 B->ops->mult = MatMult_SeqBAIJ_2; 2255 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2256 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2257 break; 2258 case 3: 2259 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2260 B->ops->mult = MatMult_SeqBAIJ_3; 2261 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2262 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2263 break; 2264 case 4: 2265 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2266 B->ops->mult = MatMult_SeqBAIJ_4; 2267 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2268 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2269 break; 2270 case 5: 2271 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2272 B->ops->mult = MatMult_SeqBAIJ_5; 2273 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2274 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2275 break; 2276 case 6: 2277 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2278 B->ops->mult = MatMult_SeqBAIJ_6; 2279 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2280 break; 2281 case 7: 2282 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2283 B->ops->mult = MatMult_SeqBAIJ_7; 2284 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2285 break; 2286 default: 2287 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2288 B->ops->mult = MatMult_SeqBAIJ_N; 2289 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2290 break; 2291 } 2292 } 2293 B->rmap.bs = bs; 2294 b->mbs = mbs; 2295 b->nbs = nbs; 2296 if (!skipallocation) { 2297 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2298 /* b->ilen will count nonzeros in each block row so far. */ 2299 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2300 if (!nnz) { 2301 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2302 else if (nz <= 0) nz = 1; 2303 for (i=0; i<mbs; i++) b->imax[i] = nz; 2304 nz = nz*mbs; 2305 } else { 2306 nz = 0; 2307 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2308 } 2309 2310 /* allocate the matrix space */ 2311 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2312 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2313 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2314 b->singlemalloc = PETSC_TRUE; 2315 2316 b->i[0] = 0; 2317 for (i=1; i<mbs+1; i++) { 2318 b->i[i] = b->i[i-1] + b->imax[i-1]; 2319 } 2320 b->free_a = PETSC_TRUE; 2321 b->free_ij = PETSC_TRUE; 2322 } else { 2323 b->free_a = PETSC_FALSE; 2324 b->free_ij = PETSC_FALSE; 2325 } 2326 2327 B->rmap.bs = bs; 2328 b->bs2 = bs2; 2329 b->mbs = mbs; 2330 b->nz = 0; 2331 b->maxnz = nz*bs2; 2332 B->info.nz_unneeded = (PetscReal)b->maxnz; 2333 PetscFunctionReturn(0); 2334 } 2335 EXTERN_C_END 2336 2337 /*MC 2338 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2339 block sparse compressed row format. 2340 2341 Options Database Keys: 2342 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2343 2344 Level: beginner 2345 2346 .seealso: MatCreateSeqBAIJ() 2347 M*/ 2348 2349 EXTERN_C_BEGIN 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatCreate_SeqBAIJ" 2352 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2353 { 2354 PetscErrorCode ierr; 2355 PetscMPIInt size; 2356 Mat_SeqBAIJ *b; 2357 2358 PetscFunctionBegin; 2359 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2360 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2361 2362 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2363 B->data = (void*)b; 2364 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2365 B->factor = 0; 2366 B->mapping = 0; 2367 b->row = 0; 2368 b->col = 0; 2369 b->icol = 0; 2370 b->reallocs = 0; 2371 b->saved_values = 0; 2372 #if defined(PETSC_USE_MAT_SINGLE) 2373 b->setvalueslen = 0; 2374 b->setvaluescopy = PETSC_NULL; 2375 #endif 2376 2377 b->sorted = PETSC_FALSE; 2378 b->roworiented = PETSC_TRUE; 2379 b->nonew = 0; 2380 b->diag = 0; 2381 b->solve_work = 0; 2382 b->mult_work = 0; 2383 B->spptr = 0; 2384 B->info.nz_unneeded = (PetscReal)b->maxnz; 2385 b->keepzeroedrows = PETSC_FALSE; 2386 b->xtoy = 0; 2387 b->XtoY = 0; 2388 b->compressedrow.use = PETSC_FALSE; 2389 b->compressedrow.nrows = 0; 2390 b->compressedrow.i = PETSC_NULL; 2391 b->compressedrow.rindex = PETSC_NULL; 2392 b->compressedrow.checked = PETSC_FALSE; 2393 B->same_nonzero = PETSC_FALSE; 2394 2395 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2396 "MatInvertBlockDiagonal_SeqBAIJ", 2397 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2398 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2399 "MatStoreValues_SeqBAIJ", 2400 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2402 "MatRetrieveValues_SeqBAIJ", 2403 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2404 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2405 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2406 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2407 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2408 "MatConvert_SeqBAIJ_SeqAIJ", 2409 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2410 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2411 "MatConvert_SeqBAIJ_SeqSBAIJ", 2412 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2414 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2415 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2416 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 EXTERN_C_END 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2423 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2424 { 2425 Mat C; 2426 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2427 PetscErrorCode ierr; 2428 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2429 2430 PetscFunctionBegin; 2431 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2432 2433 *B = 0; 2434 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2435 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2436 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2437 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2438 c = (Mat_SeqBAIJ*)C->data; 2439 2440 C->rmap.N = A->rmap.N; 2441 C->cmap.N = A->cmap.N; 2442 C->rmap.bs = A->rmap.bs; 2443 c->bs2 = a->bs2; 2444 c->mbs = a->mbs; 2445 c->nbs = a->nbs; 2446 2447 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2448 for (i=0; i<mbs; i++) { 2449 c->imax[i] = a->imax[i]; 2450 c->ilen[i] = a->ilen[i]; 2451 } 2452 2453 /* allocate the matrix space */ 2454 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2455 c->singlemalloc = PETSC_TRUE; 2456 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2457 if (mbs > 0) { 2458 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2459 if (cpvalues == MAT_COPY_VALUES) { 2460 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2461 } else { 2462 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2463 } 2464 } 2465 c->sorted = a->sorted; 2466 c->roworiented = a->roworiented; 2467 c->nonew = a->nonew; 2468 2469 if (a->diag) { 2470 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2471 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2472 for (i=0; i<mbs; i++) { 2473 c->diag[i] = a->diag[i]; 2474 } 2475 } else c->diag = 0; 2476 c->nz = a->nz; 2477 c->maxnz = a->maxnz; 2478 c->solve_work = 0; 2479 c->mult_work = 0; 2480 c->free_a = PETSC_TRUE; 2481 c->free_ij = PETSC_TRUE; 2482 C->preallocated = PETSC_TRUE; 2483 C->assembled = PETSC_TRUE; 2484 2485 c->compressedrow.use = a->compressedrow.use; 2486 c->compressedrow.nrows = a->compressedrow.nrows; 2487 c->compressedrow.checked = a->compressedrow.checked; 2488 if ( a->compressedrow.checked && a->compressedrow.use){ 2489 i = a->compressedrow.nrows; 2490 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2491 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2492 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2493 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2494 } else { 2495 c->compressedrow.use = PETSC_FALSE; 2496 c->compressedrow.i = PETSC_NULL; 2497 c->compressedrow.rindex = PETSC_NULL; 2498 } 2499 C->same_nonzero = A->same_nonzero; 2500 *B = C; 2501 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 #undef __FUNCT__ 2506 #define __FUNCT__ "MatLoad_SeqBAIJ" 2507 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2508 { 2509 Mat_SeqBAIJ *a; 2510 Mat B; 2511 PetscErrorCode ierr; 2512 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2513 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2514 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2515 PetscInt *masked,nmask,tmp,bs2,ishift; 2516 PetscMPIInt size; 2517 int fd; 2518 PetscScalar *aa; 2519 MPI_Comm comm = ((PetscObject)viewer)->comm; 2520 2521 PetscFunctionBegin; 2522 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2523 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2524 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2525 bs2 = bs*bs; 2526 2527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2528 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2529 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2530 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2531 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2532 M = header[1]; N = header[2]; nz = header[3]; 2533 2534 if (header[3] < 0) { 2535 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2536 } 2537 2538 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2539 2540 /* 2541 This code adds extra rows to make sure the number of rows is 2542 divisible by the blocksize 2543 */ 2544 mbs = M/bs; 2545 extra_rows = bs - M + bs*(mbs); 2546 if (extra_rows == bs) extra_rows = 0; 2547 else mbs++; 2548 if (extra_rows) { 2549 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2550 } 2551 2552 /* read in row lengths */ 2553 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2554 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2555 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2556 2557 /* read in column indices */ 2558 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2559 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2560 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2561 2562 /* loop over row lengths determining block row lengths */ 2563 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2564 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2565 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2566 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2567 masked = mask + mbs; 2568 rowcount = 0; nzcount = 0; 2569 for (i=0; i<mbs; i++) { 2570 nmask = 0; 2571 for (j=0; j<bs; j++) { 2572 kmax = rowlengths[rowcount]; 2573 for (k=0; k<kmax; k++) { 2574 tmp = jj[nzcount++]/bs; 2575 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2576 } 2577 rowcount++; 2578 } 2579 browlengths[i] += nmask; 2580 /* zero out the mask elements we set */ 2581 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2582 } 2583 2584 /* create our matrix */ 2585 ierr = MatCreate(comm,&B); 2586 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2587 ierr = MatSetType(B,type);CHKERRQ(ierr); 2588 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2589 a = (Mat_SeqBAIJ*)B->data; 2590 2591 /* set matrix "i" values */ 2592 a->i[0] = 0; 2593 for (i=1; i<= mbs; i++) { 2594 a->i[i] = a->i[i-1] + browlengths[i-1]; 2595 a->ilen[i-1] = browlengths[i-1]; 2596 } 2597 a->nz = 0; 2598 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2599 2600 /* read in nonzero values */ 2601 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2602 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2603 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2604 2605 /* set "a" and "j" values into matrix */ 2606 nzcount = 0; jcount = 0; 2607 for (i=0; i<mbs; i++) { 2608 nzcountb = nzcount; 2609 nmask = 0; 2610 for (j=0; j<bs; j++) { 2611 kmax = rowlengths[i*bs+j]; 2612 for (k=0; k<kmax; k++) { 2613 tmp = jj[nzcount++]/bs; 2614 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2615 } 2616 } 2617 /* sort the masked values */ 2618 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2619 2620 /* set "j" values into matrix */ 2621 maskcount = 1; 2622 for (j=0; j<nmask; j++) { 2623 a->j[jcount++] = masked[j]; 2624 mask[masked[j]] = maskcount++; 2625 } 2626 /* set "a" values into matrix */ 2627 ishift = bs2*a->i[i]; 2628 for (j=0; j<bs; j++) { 2629 kmax = rowlengths[i*bs+j]; 2630 for (k=0; k<kmax; k++) { 2631 tmp = jj[nzcountb]/bs ; 2632 block = mask[tmp] - 1; 2633 point = jj[nzcountb] - bs*tmp; 2634 idx = ishift + bs2*block + j + bs*point; 2635 a->a[idx] = (MatScalar)aa[nzcountb++]; 2636 } 2637 } 2638 /* zero out the mask elements we set */ 2639 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2640 } 2641 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2642 2643 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2644 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2645 ierr = PetscFree(aa);CHKERRQ(ierr); 2646 ierr = PetscFree(jj);CHKERRQ(ierr); 2647 ierr = PetscFree(mask);CHKERRQ(ierr); 2648 2649 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = MatView_Private(B);CHKERRQ(ierr); 2652 2653 *A = B; 2654 PetscFunctionReturn(0); 2655 } 2656 2657 #undef __FUNCT__ 2658 #define __FUNCT__ "MatCreateSeqBAIJ" 2659 /*@C 2660 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2661 compressed row) format. For good matrix assembly performance the 2662 user should preallocate the matrix storage by setting the parameter nz 2663 (or the array nnz). By setting these parameters accurately, performance 2664 during matrix assembly can be increased by more than a factor of 50. 2665 2666 Collective on MPI_Comm 2667 2668 Input Parameters: 2669 + comm - MPI communicator, set to PETSC_COMM_SELF 2670 . bs - size of block 2671 . m - number of rows 2672 . n - number of columns 2673 . nz - number of nonzero blocks per block row (same for all rows) 2674 - nnz - array containing the number of nonzero blocks in the various block rows 2675 (possibly different for each block row) or PETSC_NULL 2676 2677 Output Parameter: 2678 . A - the matrix 2679 2680 Options Database Keys: 2681 . -mat_no_unroll - uses code that does not unroll the loops in the 2682 block calculations (much slower) 2683 . -mat_block_size - size of the blocks to use 2684 2685 Level: intermediate 2686 2687 Notes: 2688 The number of rows and columns must be divisible by blocksize. 2689 2690 If the nnz parameter is given then the nz parameter is ignored 2691 2692 A nonzero block is any block that as 1 or more nonzeros in it 2693 2694 The block AIJ format is fully compatible with standard Fortran 77 2695 storage. That is, the stored row and column indices can begin at 2696 either one (as in Fortran) or zero. See the users' manual for details. 2697 2698 Specify the preallocated storage with either nz or nnz (not both). 2699 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2700 allocation. For additional details, see the users manual chapter on 2701 matrices. 2702 2703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2704 @*/ 2705 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2706 { 2707 PetscErrorCode ierr; 2708 2709 PetscFunctionBegin; 2710 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2711 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2712 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2713 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2714 PetscFunctionReturn(0); 2715 } 2716 2717 #undef __FUNCT__ 2718 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2719 /*@C 2720 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2721 per row in the matrix. For good matrix assembly performance the 2722 user should preallocate the matrix storage by setting the parameter nz 2723 (or the array nnz). By setting these parameters accurately, performance 2724 during matrix assembly can be increased by more than a factor of 50. 2725 2726 Collective on MPI_Comm 2727 2728 Input Parameters: 2729 + A - the matrix 2730 . bs - size of block 2731 . nz - number of block nonzeros per block row (same for all rows) 2732 - nnz - array containing the number of block nonzeros in the various block rows 2733 (possibly different for each block row) or PETSC_NULL 2734 2735 Options Database Keys: 2736 . -mat_no_unroll - uses code that does not unroll the loops in the 2737 block calculations (much slower) 2738 . -mat_block_size - size of the blocks to use 2739 2740 Level: intermediate 2741 2742 Notes: 2743 If the nnz parameter is given then the nz parameter is ignored 2744 2745 The block AIJ format is fully compatible with standard Fortran 77 2746 storage. That is, the stored row and column indices can begin at 2747 either one (as in Fortran) or zero. See the users' manual for details. 2748 2749 Specify the preallocated storage with either nz or nnz (not both). 2750 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2751 allocation. For additional details, see the users manual chapter on 2752 matrices. 2753 2754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2755 @*/ 2756 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2757 { 2758 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2759 2760 PetscFunctionBegin; 2761 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2762 if (f) { 2763 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2764 } 2765 PetscFunctionReturn(0); 2766 } 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2770 /*@ 2771 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2772 (upper triangular entries in CSR format) provided by the user. 2773 2774 Collective on MPI_Comm 2775 2776 Input Parameters: 2777 + comm - must be an MPI communicator of size 1 2778 . bs - size of block 2779 . m - number of rows 2780 . n - number of columns 2781 . i - row indices 2782 . j - column indices 2783 - a - matrix values 2784 2785 Output Parameter: 2786 . mat - the matrix 2787 2788 Level: intermediate 2789 2790 Notes: 2791 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2792 once the matrix is destroyed 2793 2794 You cannot set new nonzero locations into this matrix, that will generate an error. 2795 2796 The i and j indices are 0 based 2797 2798 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2799 2800 @*/ 2801 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2802 { 2803 PetscErrorCode ierr; 2804 PetscInt ii; 2805 Mat_SeqBAIJ *baij; 2806 2807 PetscFunctionBegin; 2808 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2809 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2810 2811 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2812 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2813 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2814 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2815 baij = (Mat_SeqBAIJ*)(*mat)->data; 2816 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2817 2818 baij->i = i; 2819 baij->j = j; 2820 baij->a = a; 2821 baij->singlemalloc = PETSC_FALSE; 2822 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2823 baij->free_a = PETSC_FALSE; 2824 baij->free_ij = PETSC_FALSE; 2825 2826 for (ii=0; ii<m; ii++) { 2827 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2828 #if defined(PETSC_USE_DEBUG) 2829 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2830 #endif 2831 } 2832 #if defined(PETSC_USE_DEBUG) 2833 for (ii=0; ii<baij->i[m]; ii++) { 2834 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2835 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2836 } 2837 #endif 2838 2839 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843