1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt n = a->mbs,i; 785 786 PetscFunctionBegin; 787 *nn = n; 788 if (!ia) PetscFunctionReturn(0); 789 if (symmetric) { 790 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 791 } else if (oshift == 1) { 792 /* temporarily add 1 to i and j indices */ 793 PetscInt nz = a->i[n]; 794 for (i=0; i<nz; i++) a->j[i]++; 795 for (i=0; i<n+1; i++) a->i[i]++; 796 *ia = a->i; *ja = a->j; 797 } else { 798 *ia = a->i; *ja = a->j; 799 } 800 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscErrorCode ierr; 810 PetscInt i,n = a->mbs; 811 812 PetscFunctionBegin; 813 if (!ia) PetscFunctionReturn(0); 814 if (symmetric) { 815 ierr = PetscFree(*ia);CHKERRQ(ierr); 816 ierr = PetscFree(*ja);CHKERRQ(ierr); 817 } else if (oshift == 1) { 818 PetscInt nz = a->i[n]-1; 819 for (i=0; i<nz; i++) a->j[i]--; 820 for (i=0; i<n+1; i++) a->i[i]--; 821 } 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatDestroy_SeqBAIJ" 827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 #if defined(PETSC_USE_LOG) 834 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 835 #endif 836 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 837 if (a->row) { 838 ierr = ISDestroy(a->row);CHKERRQ(ierr); 839 } 840 if (a->col) { 841 ierr = ISDestroy(a->col);CHKERRQ(ierr); 842 } 843 ierr = PetscFree(a->diag);CHKERRQ(ierr); 844 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 845 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 846 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 847 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 848 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 849 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 850 #if defined(PETSC_USE_MAT_SINGLE) 851 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 852 #endif 853 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 854 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 855 856 ierr = PetscFree(a);CHKERRQ(ierr); 857 858 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatSetOption_SeqBAIJ" 871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 872 { 873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 switch (op) { 878 case MAT_ROW_ORIENTED: 879 a->roworiented = PETSC_TRUE; 880 break; 881 case MAT_COLUMN_ORIENTED: 882 a->roworiented = PETSC_FALSE; 883 break; 884 case MAT_COLUMNS_SORTED: 885 a->sorted = PETSC_TRUE; 886 break; 887 case MAT_COLUMNS_UNSORTED: 888 a->sorted = PETSC_FALSE; 889 break; 890 case MAT_KEEP_ZEROED_ROWS: 891 a->keepzeroedrows = PETSC_TRUE; 892 break; 893 case MAT_NO_NEW_NONZERO_LOCATIONS: 894 a->nonew = 1; 895 break; 896 case MAT_NEW_NONZERO_LOCATION_ERR: 897 a->nonew = -1; 898 break; 899 case MAT_NEW_NONZERO_ALLOCATION_ERR: 900 a->nonew = -2; 901 break; 902 case MAT_YES_NEW_NONZERO_LOCATIONS: 903 a->nonew = 0; 904 break; 905 case MAT_ROWS_SORTED: 906 case MAT_ROWS_UNSORTED: 907 case MAT_YES_NEW_DIAGONALS: 908 case MAT_IGNORE_OFF_PROC_ENTRIES: 909 case MAT_USE_HASH_TABLE: 910 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 911 break; 912 case MAT_NO_NEW_DIAGONALS: 913 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 914 case MAT_SYMMETRIC: 915 case MAT_STRUCTURALLY_SYMMETRIC: 916 case MAT_NOT_SYMMETRIC: 917 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 918 case MAT_HERMITIAN: 919 case MAT_NOT_HERMITIAN: 920 case MAT_SYMMETRY_ETERNAL: 921 case MAT_NOT_SYMMETRY_ETERNAL: 922 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 923 break; 924 default: 925 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 926 } 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatGetRow_SeqBAIJ" 932 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 933 { 934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 935 PetscErrorCode ierr; 936 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 937 MatScalar *aa,*aa_i; 938 PetscScalar *v_i; 939 940 PetscFunctionBegin; 941 bs = A->rmap.bs; 942 ai = a->i; 943 aj = a->j; 944 aa = a->a; 945 bs2 = a->bs2; 946 947 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 948 949 bn = row/bs; /* Block number */ 950 bp = row % bs; /* Block Position */ 951 M = ai[bn+1] - ai[bn]; 952 *nz = bs*M; 953 954 if (v) { 955 *v = 0; 956 if (*nz) { 957 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 958 for (i=0; i<M; i++) { /* for each block in the block row */ 959 v_i = *v + i*bs; 960 aa_i = aa + bs2*(ai[bn] + i); 961 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 962 } 963 } 964 } 965 966 if (idx) { 967 *idx = 0; 968 if (*nz) { 969 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 970 for (i=0; i<M; i++) { /* for each block in the block row */ 971 idx_i = *idx + i*bs; 972 itmp = bs*aj[ai[bn] + i]; 973 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 974 } 975 } 976 } 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 982 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 983 { 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 988 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatTranspose_SeqBAIJ" 994 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 995 { 996 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 997 Mat C; 998 PetscErrorCode ierr; 999 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1000 PetscInt *rows,*cols,bs2=a->bs2; 1001 PetscScalar *array; 1002 1003 PetscFunctionBegin; 1004 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1005 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1006 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1007 1008 #if defined(PETSC_USE_MAT_SINGLE) 1009 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1010 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1011 #else 1012 array = a->a; 1013 #endif 1014 1015 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1016 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1017 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1018 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1019 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1020 ierr = PetscFree(col);CHKERRQ(ierr); 1021 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1022 cols = rows + bs; 1023 for (i=0; i<mbs; i++) { 1024 cols[0] = i*bs; 1025 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1026 len = ai[i+1] - ai[i]; 1027 for (j=0; j<len; j++) { 1028 rows[0] = (*aj++)*bs; 1029 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1030 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1031 array += bs2; 1032 } 1033 } 1034 ierr = PetscFree(rows);CHKERRQ(ierr); 1035 #if defined(PETSC_USE_MAT_SINGLE) 1036 ierr = PetscFree(array); 1037 #endif 1038 1039 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 1042 if (B) { 1043 *B = C; 1044 } else { 1045 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1046 } 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1052 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1053 { 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1057 int fd; 1058 PetscScalar *aa; 1059 FILE *file; 1060 1061 PetscFunctionBegin; 1062 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1063 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1064 col_lens[0] = MAT_FILE_COOKIE; 1065 1066 col_lens[1] = A->rmap.N; 1067 col_lens[2] = A->cmap.n; 1068 col_lens[3] = a->nz*bs2; 1069 1070 /* store lengths of each row and write (including header) to file */ 1071 count = 0; 1072 for (i=0; i<a->mbs; i++) { 1073 for (j=0; j<bs; j++) { 1074 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1075 } 1076 } 1077 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1078 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1079 1080 /* store column indices (zero start index) */ 1081 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1082 count = 0; 1083 for (i=0; i<a->mbs; i++) { 1084 for (j=0; j<bs; j++) { 1085 for (k=a->i[i]; k<a->i[i+1]; k++) { 1086 for (l=0; l<bs; l++) { 1087 jj[count++] = bs*a->j[k] + l; 1088 } 1089 } 1090 } 1091 } 1092 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1093 ierr = PetscFree(jj);CHKERRQ(ierr); 1094 1095 /* store nonzero values */ 1096 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1097 count = 0; 1098 for (i=0; i<a->mbs; i++) { 1099 for (j=0; j<bs; j++) { 1100 for (k=a->i[i]; k<a->i[i+1]; k++) { 1101 for (l=0; l<bs; l++) { 1102 aa[count++] = a->a[bs2*k + l*bs + j]; 1103 } 1104 } 1105 } 1106 } 1107 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1108 ierr = PetscFree(aa);CHKERRQ(ierr); 1109 1110 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1111 if (file) { 1112 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1119 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1120 { 1121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1124 PetscViewerFormat format; 1125 1126 PetscFunctionBegin; 1127 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1128 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1129 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1130 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1131 Mat aij; 1132 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1133 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1134 ierr = MatDestroy(aij);CHKERRQ(ierr); 1135 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1136 PetscFunctionReturn(0); 1137 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1138 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1139 for (i=0; i<a->mbs; i++) { 1140 for (j=0; j<bs; j++) { 1141 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1142 for (k=a->i[i]; k<a->i[i+1]; k++) { 1143 for (l=0; l<bs; l++) { 1144 #if defined(PETSC_USE_COMPLEX) 1145 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1146 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1147 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1148 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1149 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1150 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1151 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1152 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1153 } 1154 #else 1155 if (a->a[bs2*k + l*bs + j] != 0.0) { 1156 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1157 } 1158 #endif 1159 } 1160 } 1161 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1162 } 1163 } 1164 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1167 for (i=0; i<a->mbs; i++) { 1168 for (j=0; j<bs; j++) { 1169 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1170 for (k=a->i[i]; k<a->i[i+1]; k++) { 1171 for (l=0; l<bs; l++) { 1172 #if defined(PETSC_USE_COMPLEX) 1173 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1174 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1175 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1176 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1177 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1178 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1179 } else { 1180 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1181 } 1182 #else 1183 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1184 #endif 1185 } 1186 } 1187 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1188 } 1189 } 1190 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1198 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1199 { 1200 Mat A = (Mat) Aa; 1201 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1204 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1205 MatScalar *aa; 1206 PetscViewer viewer; 1207 1208 PetscFunctionBegin; 1209 1210 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1211 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1212 1213 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1214 1215 /* loop over matrix elements drawing boxes */ 1216 color = PETSC_DRAW_BLUE; 1217 for (i=0,row=0; i<mbs; i++,row+=bs) { 1218 for (j=a->i[i]; j<a->i[i+1]; j++) { 1219 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1220 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1221 aa = a->a + j*bs2; 1222 for (k=0; k<bs; k++) { 1223 for (l=0; l<bs; l++) { 1224 if (PetscRealPart(*aa++) >= 0.) continue; 1225 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1226 } 1227 } 1228 } 1229 } 1230 color = PETSC_DRAW_CYAN; 1231 for (i=0,row=0; i<mbs; i++,row+=bs) { 1232 for (j=a->i[i]; j<a->i[i+1]; j++) { 1233 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1234 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1235 aa = a->a + j*bs2; 1236 for (k=0; k<bs; k++) { 1237 for (l=0; l<bs; l++) { 1238 if (PetscRealPart(*aa++) != 0.) continue; 1239 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1240 } 1241 } 1242 } 1243 } 1244 1245 color = PETSC_DRAW_RED; 1246 for (i=0,row=0; i<mbs; i++,row+=bs) { 1247 for (j=a->i[i]; j<a->i[i+1]; j++) { 1248 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1249 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1250 aa = a->a + j*bs2; 1251 for (k=0; k<bs; k++) { 1252 for (l=0; l<bs; l++) { 1253 if (PetscRealPart(*aa++) <= 0.) continue; 1254 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1255 } 1256 } 1257 } 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1264 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1265 { 1266 PetscErrorCode ierr; 1267 PetscReal xl,yl,xr,yr,w,h; 1268 PetscDraw draw; 1269 PetscTruth isnull; 1270 1271 PetscFunctionBegin; 1272 1273 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1274 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1275 1276 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1277 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1278 xr += w; yr += h; xl = -w; yl = -h; 1279 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1280 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1281 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatView_SeqBAIJ" 1287 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1288 { 1289 PetscErrorCode ierr; 1290 PetscTruth iascii,isbinary,isdraw; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1294 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1295 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1296 if (iascii){ 1297 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1298 } else if (isbinary) { 1299 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1300 } else if (isdraw) { 1301 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1302 } else { 1303 Mat B; 1304 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1305 ierr = MatView(B,viewer);CHKERRQ(ierr); 1306 ierr = MatDestroy(B);CHKERRQ(ierr); 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1314 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1315 { 1316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1317 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1318 PetscInt *ai = a->i,*ailen = a->ilen; 1319 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1320 MatScalar *ap,*aa = a->a,zero = 0.0; 1321 1322 PetscFunctionBegin; 1323 for (k=0; k<m; k++) { /* loop over rows */ 1324 row = im[k]; brow = row/bs; 1325 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1326 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1327 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1328 nrow = ailen[brow]; 1329 for (l=0; l<n; l++) { /* loop over columns */ 1330 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1331 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1332 col = in[l] ; 1333 bcol = col/bs; 1334 cidx = col%bs; 1335 ridx = row%bs; 1336 high = nrow; 1337 low = 0; /* assume unsorted */ 1338 while (high-low > 5) { 1339 t = (low+high)/2; 1340 if (rp[t] > bcol) high = t; 1341 else low = t; 1342 } 1343 for (i=low; i<high; i++) { 1344 if (rp[i] > bcol) break; 1345 if (rp[i] == bcol) { 1346 *v++ = ap[bs2*i+bs*cidx+ridx]; 1347 goto finished; 1348 } 1349 } 1350 *v++ = zero; 1351 finished:; 1352 } 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #if defined(PETSC_USE_MAT_SINGLE) 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1360 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1361 { 1362 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1363 PetscErrorCode ierr; 1364 PetscInt i,N = m*n*b->bs2; 1365 MatScalar *vsingle; 1366 1367 PetscFunctionBegin; 1368 if (N > b->setvalueslen) { 1369 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1370 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1371 b->setvalueslen = N; 1372 } 1373 vsingle = b->setvaluescopy; 1374 for (i=0; i<N; i++) { 1375 vsingle[i] = v[i]; 1376 } 1377 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 #endif 1381 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1385 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1386 { 1387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1388 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1389 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1390 PetscErrorCode ierr; 1391 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1392 PetscTruth roworiented=a->roworiented; 1393 const MatScalar *value = v; 1394 MatScalar *ap,*aa = a->a,*bap; 1395 1396 PetscFunctionBegin; 1397 if (roworiented) { 1398 stepval = (n-1)*bs; 1399 } else { 1400 stepval = (m-1)*bs; 1401 } 1402 for (k=0; k<m; k++) { /* loop over added rows */ 1403 row = im[k]; 1404 if (row < 0) continue; 1405 #if defined(PETSC_USE_DEBUG) 1406 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1407 #endif 1408 rp = aj + ai[row]; 1409 ap = aa + bs2*ai[row]; 1410 rmax = imax[row]; 1411 nrow = ailen[row]; 1412 low = 0; 1413 high = nrow; 1414 for (l=0; l<n; l++) { /* loop over added columns */ 1415 if (in[l] < 0) continue; 1416 #if defined(PETSC_USE_DEBUG) 1417 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1418 #endif 1419 col = in[l]; 1420 if (roworiented) { 1421 value = v + k*(stepval+bs)*bs + l*bs; 1422 } else { 1423 value = v + l*(stepval+bs)*bs + k*bs; 1424 } 1425 if (col <= lastcol) low = 0; else high = nrow; 1426 lastcol = col; 1427 while (high-low > 7) { 1428 t = (low+high)/2; 1429 if (rp[t] > col) high = t; 1430 else low = t; 1431 } 1432 for (i=low; i<high; i++) { 1433 if (rp[i] > col) break; 1434 if (rp[i] == col) { 1435 bap = ap + bs2*i; 1436 if (roworiented) { 1437 if (is == ADD_VALUES) { 1438 for (ii=0; ii<bs; ii++,value+=stepval) { 1439 for (jj=ii; jj<bs2; jj+=bs) { 1440 bap[jj] += *value++; 1441 } 1442 } 1443 } else { 1444 for (ii=0; ii<bs; ii++,value+=stepval) { 1445 for (jj=ii; jj<bs2; jj+=bs) { 1446 bap[jj] = *value++; 1447 } 1448 } 1449 } 1450 } else { 1451 if (is == ADD_VALUES) { 1452 for (ii=0; ii<bs; ii++,value+=stepval) { 1453 for (jj=0; jj<bs; jj++) { 1454 *bap++ += *value++; 1455 } 1456 } 1457 } else { 1458 for (ii=0; ii<bs; ii++,value+=stepval) { 1459 for (jj=0; jj<bs; jj++) { 1460 *bap++ = *value++; 1461 } 1462 } 1463 } 1464 } 1465 goto noinsert2; 1466 } 1467 } 1468 if (nonew == 1) goto noinsert2; 1469 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1470 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1471 N = nrow++ - 1; high++; 1472 /* shift up all the later entries in this row */ 1473 for (ii=N; ii>=i; ii--) { 1474 rp[ii+1] = rp[ii]; 1475 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1476 } 1477 if (N >= i) { 1478 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1479 } 1480 rp[i] = col; 1481 bap = ap + bs2*i; 1482 if (roworiented) { 1483 for (ii=0; ii<bs; ii++,value+=stepval) { 1484 for (jj=ii; jj<bs2; jj+=bs) { 1485 bap[jj] = *value++; 1486 } 1487 } 1488 } else { 1489 for (ii=0; ii<bs; ii++,value+=stepval) { 1490 for (jj=0; jj<bs; jj++) { 1491 *bap++ = *value++; 1492 } 1493 } 1494 } 1495 noinsert2:; 1496 low = i; 1497 } 1498 ailen[row] = nrow; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1505 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1506 { 1507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1508 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1509 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1510 PetscErrorCode ierr; 1511 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1512 MatScalar *aa = a->a,*ap; 1513 PetscReal ratio=0.6; 1514 1515 PetscFunctionBegin; 1516 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1517 1518 if (m) rmax = ailen[0]; 1519 for (i=1; i<mbs; i++) { 1520 /* move each row back by the amount of empty slots (fshift) before it*/ 1521 fshift += imax[i-1] - ailen[i-1]; 1522 rmax = PetscMax(rmax,ailen[i]); 1523 if (fshift) { 1524 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1525 N = ailen[i]; 1526 for (j=0; j<N; j++) { 1527 ip[j-fshift] = ip[j]; 1528 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1529 } 1530 } 1531 ai[i] = ai[i-1] + ailen[i-1]; 1532 } 1533 if (mbs) { 1534 fshift += imax[mbs-1] - ailen[mbs-1]; 1535 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1536 } 1537 /* reset ilen and imax for each row */ 1538 for (i=0; i<mbs; i++) { 1539 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1540 } 1541 a->nz = ai[mbs]; 1542 1543 /* diagonals may have moved, so kill the diagonal pointers */ 1544 a->idiagvalid = PETSC_FALSE; 1545 if (fshift && a->diag) { 1546 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1547 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1548 a->diag = 0; 1549 } 1550 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1551 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1552 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1553 a->reallocs = 0; 1554 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1555 1556 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1557 if (a->compressedrow.use){ 1558 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1559 } 1560 1561 A->same_nonzero = PETSC_TRUE; 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /* 1566 This function returns an array of flags which indicate the locations of contiguous 1567 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1568 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1569 Assume: sizes should be long enough to hold all the values. 1570 */ 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1573 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1574 { 1575 PetscInt i,j,k,row; 1576 PetscTruth flg; 1577 1578 PetscFunctionBegin; 1579 for (i=0,j=0; i<n; j++) { 1580 row = idx[i]; 1581 if (row%bs!=0) { /* Not the begining of a block */ 1582 sizes[j] = 1; 1583 i++; 1584 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1585 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1586 i++; 1587 } else { /* Begining of the block, so check if the complete block exists */ 1588 flg = PETSC_TRUE; 1589 for (k=1; k<bs; k++) { 1590 if (row+k != idx[i+k]) { /* break in the block */ 1591 flg = PETSC_FALSE; 1592 break; 1593 } 1594 } 1595 if (flg) { /* No break in the bs */ 1596 sizes[j] = bs; 1597 i+= bs; 1598 } else { 1599 sizes[j] = 1; 1600 i++; 1601 } 1602 } 1603 } 1604 *bs_max = j; 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1610 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1611 { 1612 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 PetscInt i,j,k,count,*rows; 1615 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1616 PetscScalar zero = 0.0; 1617 MatScalar *aa; 1618 1619 PetscFunctionBegin; 1620 /* Make a copy of the IS and sort it */ 1621 /* allocate memory for rows,sizes */ 1622 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1623 sizes = rows + is_n; 1624 1625 /* copy IS values to rows, and sort them */ 1626 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1627 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1628 if (baij->keepzeroedrows) { 1629 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1630 bs_max = is_n; 1631 A->same_nonzero = PETSC_TRUE; 1632 } else { 1633 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1634 A->same_nonzero = PETSC_FALSE; 1635 } 1636 1637 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1638 row = rows[j]; 1639 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1640 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1641 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1642 if (sizes[i] == bs && !baij->keepzeroedrows) { 1643 if (diag != 0.0) { 1644 if (baij->ilen[row/bs] > 0) { 1645 baij->ilen[row/bs] = 1; 1646 baij->j[baij->i[row/bs]] = row/bs; 1647 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1648 } 1649 /* Now insert all the diagonal values for this bs */ 1650 for (k=0; k<bs; k++) { 1651 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 } else { /* (diag == 0.0) */ 1654 baij->ilen[row/bs] = 0; 1655 } /* end (diag == 0.0) */ 1656 } else { /* (sizes[i] != bs) */ 1657 #if defined (PETSC_USE_DEBUG) 1658 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1659 #endif 1660 for (k=0; k<count; k++) { 1661 aa[0] = zero; 1662 aa += bs; 1663 } 1664 if (diag != 0.0) { 1665 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1666 } 1667 } 1668 } 1669 1670 ierr = PetscFree(rows);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1678 { 1679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1680 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1681 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1682 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1683 PetscErrorCode ierr; 1684 PetscInt ridx,cidx,bs2=a->bs2; 1685 PetscTruth roworiented=a->roworiented; 1686 MatScalar *ap,value,*aa=a->a,*bap; 1687 1688 PetscFunctionBegin; 1689 for (k=0; k<m; k++) { /* loop over added rows */ 1690 row = im[k]; 1691 brow = row/bs; 1692 if (row < 0) continue; 1693 #if defined(PETSC_USE_DEBUG) 1694 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1695 #endif 1696 rp = aj + ai[brow]; 1697 ap = aa + bs2*ai[brow]; 1698 rmax = imax[brow]; 1699 nrow = ailen[brow]; 1700 low = 0; 1701 high = nrow; 1702 for (l=0; l<n; l++) { /* loop over added columns */ 1703 if (in[l] < 0) continue; 1704 #if defined(PETSC_USE_DEBUG) 1705 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1706 #endif 1707 col = in[l]; bcol = col/bs; 1708 ridx = row % bs; cidx = col % bs; 1709 if (roworiented) { 1710 value = v[l + k*n]; 1711 } else { 1712 value = v[k + l*m]; 1713 } 1714 if (col <= lastcol) low = 0; else high = nrow; 1715 lastcol = col; 1716 while (high-low > 7) { 1717 t = (low+high)/2; 1718 if (rp[t] > bcol) high = t; 1719 else low = t; 1720 } 1721 for (i=low; i<high; i++) { 1722 if (rp[i] > bcol) break; 1723 if (rp[i] == bcol) { 1724 bap = ap + bs2*i + bs*cidx + ridx; 1725 if (is == ADD_VALUES) *bap += value; 1726 else *bap = value; 1727 goto noinsert1; 1728 } 1729 } 1730 if (nonew == 1) goto noinsert1; 1731 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1732 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1733 N = nrow++ - 1; high++; 1734 /* shift up all the later entries in this row */ 1735 for (ii=N; ii>=i; ii--) { 1736 rp[ii+1] = rp[ii]; 1737 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1738 } 1739 if (N>=i) { 1740 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1741 } 1742 rp[i] = bcol; 1743 ap[bs2*i + bs*cidx + ridx] = value; 1744 a->nz++; 1745 noinsert1:; 1746 low = i; 1747 } 1748 ailen[brow] = nrow; 1749 } 1750 A->same_nonzero = PETSC_FALSE; 1751 PetscFunctionReturn(0); 1752 } 1753 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1757 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1758 { 1759 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1760 Mat outA; 1761 PetscErrorCode ierr; 1762 PetscTruth row_identity,col_identity; 1763 1764 PetscFunctionBegin; 1765 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1766 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1767 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1768 if (!row_identity || !col_identity) { 1769 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1770 } 1771 1772 outA = inA; 1773 inA->factor = FACTOR_LU; 1774 1775 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1776 1777 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1778 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1779 a->row = row; 1780 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1781 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1782 a->col = col; 1783 1784 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1785 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1786 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1787 1788 /* 1789 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1790 for ILU(0) factorization with natural ordering 1791 */ 1792 if (inA->rmap.bs < 8) { 1793 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1794 } else { 1795 if (!a->solve_work) { 1796 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1797 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1798 } 1799 } 1800 1801 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1802 1803 PetscFunctionReturn(0); 1804 } 1805 1806 EXTERN_C_BEGIN 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1809 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1810 { 1811 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1812 PetscInt i,nz,nbs; 1813 1814 PetscFunctionBegin; 1815 nz = baij->maxnz/baij->bs2; 1816 nbs = baij->nbs; 1817 for (i=0; i<nz; i++) { 1818 baij->j[i] = indices[i]; 1819 } 1820 baij->nz = nz; 1821 for (i=0; i<nbs; i++) { 1822 baij->ilen[i] = baij->imax[i]; 1823 } 1824 1825 PetscFunctionReturn(0); 1826 } 1827 EXTERN_C_END 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1831 /*@ 1832 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1833 in the matrix. 1834 1835 Input Parameters: 1836 + mat - the SeqBAIJ matrix 1837 - indices - the column indices 1838 1839 Level: advanced 1840 1841 Notes: 1842 This can be called if you have precomputed the nonzero structure of the 1843 matrix and want to provide it to the matrix object to improve the performance 1844 of the MatSetValues() operation. 1845 1846 You MUST have set the correct numbers of nonzeros per row in the call to 1847 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1848 1849 MUST be called before any calls to MatSetValues(); 1850 1851 @*/ 1852 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1853 { 1854 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1855 1856 PetscFunctionBegin; 1857 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1858 PetscValidPointer(indices,2); 1859 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1860 if (f) { 1861 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1862 } else { 1863 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1870 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1871 { 1872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1873 PetscErrorCode ierr; 1874 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1875 PetscReal atmp; 1876 PetscScalar *x,zero = 0.0; 1877 MatScalar *aa; 1878 PetscInt ncols,brow,krow,kcol; 1879 1880 PetscFunctionBegin; 1881 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1882 bs = A->rmap.bs; 1883 aa = a->a; 1884 ai = a->i; 1885 aj = a->j; 1886 mbs = a->mbs; 1887 1888 ierr = VecSet(v,zero);CHKERRQ(ierr); 1889 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1890 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1891 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1892 for (i=0; i<mbs; i++) { 1893 ncols = ai[1] - ai[0]; ai++; 1894 brow = bs*i; 1895 for (j=0; j<ncols; j++){ 1896 /* bcol = bs*(*aj); */ 1897 for (kcol=0; kcol<bs; kcol++){ 1898 for (krow=0; krow<bs; krow++){ 1899 atmp = PetscAbsScalar(*aa); aa++; 1900 row = brow + krow; /* row index */ 1901 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1902 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1903 } 1904 } 1905 aj++; 1906 } 1907 } 1908 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatCopy_SeqBAIJ" 1914 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1915 { 1916 PetscErrorCode ierr; 1917 1918 PetscFunctionBegin; 1919 /* If the two matrices have the same copy implementation, use fast copy. */ 1920 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1921 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1922 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1923 1924 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1925 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1926 } 1927 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1928 } else { 1929 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1930 } 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1936 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1937 { 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1947 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1948 { 1949 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1950 PetscFunctionBegin; 1951 *array = a->a; 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1957 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1958 { 1959 PetscFunctionBegin; 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #include "petscblaslapack.h" 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1966 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1967 { 1968 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1969 PetscErrorCode ierr; 1970 PetscInt i,bs=Y->rmap.bs,j,bs2; 1971 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1972 1973 PetscFunctionBegin; 1974 if (str == SAME_NONZERO_PATTERN) { 1975 PetscScalar alpha = a; 1976 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1977 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1978 if (y->xtoy && y->XtoY != X) { 1979 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1980 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1981 } 1982 if (!y->xtoy) { /* get xtoy */ 1983 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1984 y->XtoY = X; 1985 } 1986 bs2 = bs*bs; 1987 for (i=0; i<x->nz; i++) { 1988 j = 0; 1989 while (j < bs2){ 1990 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1991 j++; 1992 } 1993 } 1994 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1995 } else { 1996 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1997 } 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2003 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2004 { 2005 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2006 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2007 PetscScalar *aa = a->a; 2008 2009 PetscFunctionBegin; 2010 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 #undef __FUNCT__ 2015 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2016 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2017 { 2018 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2019 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2020 PetscScalar *aa = a->a; 2021 2022 PetscFunctionBegin; 2023 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 2028 /* -------------------------------------------------------------------*/ 2029 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2030 MatGetRow_SeqBAIJ, 2031 MatRestoreRow_SeqBAIJ, 2032 MatMult_SeqBAIJ_N, 2033 /* 4*/ MatMultAdd_SeqBAIJ_N, 2034 MatMultTranspose_SeqBAIJ, 2035 MatMultTransposeAdd_SeqBAIJ, 2036 MatSolve_SeqBAIJ_N, 2037 0, 2038 0, 2039 /*10*/ 0, 2040 MatLUFactor_SeqBAIJ, 2041 0, 2042 0, 2043 MatTranspose_SeqBAIJ, 2044 /*15*/ MatGetInfo_SeqBAIJ, 2045 MatEqual_SeqBAIJ, 2046 MatGetDiagonal_SeqBAIJ, 2047 MatDiagonalScale_SeqBAIJ, 2048 MatNorm_SeqBAIJ, 2049 /*20*/ 0, 2050 MatAssemblyEnd_SeqBAIJ, 2051 0, 2052 MatSetOption_SeqBAIJ, 2053 MatZeroEntries_SeqBAIJ, 2054 /*25*/ MatZeroRows_SeqBAIJ, 2055 MatLUFactorSymbolic_SeqBAIJ, 2056 MatLUFactorNumeric_SeqBAIJ_N, 2057 MatCholeskyFactorSymbolic_SeqBAIJ, 2058 MatCholeskyFactorNumeric_SeqBAIJ_N, 2059 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2060 MatILUFactorSymbolic_SeqBAIJ, 2061 MatICCFactorSymbolic_SeqBAIJ, 2062 MatGetArray_SeqBAIJ, 2063 MatRestoreArray_SeqBAIJ, 2064 /*35*/ MatDuplicate_SeqBAIJ, 2065 0, 2066 0, 2067 MatILUFactor_SeqBAIJ, 2068 0, 2069 /*40*/ MatAXPY_SeqBAIJ, 2070 MatGetSubMatrices_SeqBAIJ, 2071 MatIncreaseOverlap_SeqBAIJ, 2072 MatGetValues_SeqBAIJ, 2073 MatCopy_SeqBAIJ, 2074 /*45*/ 0, 2075 MatScale_SeqBAIJ, 2076 0, 2077 0, 2078 0, 2079 /*50*/ 0, 2080 MatGetRowIJ_SeqBAIJ, 2081 MatRestoreRowIJ_SeqBAIJ, 2082 0, 2083 0, 2084 /*55*/ 0, 2085 0, 2086 0, 2087 0, 2088 MatSetValuesBlocked_SeqBAIJ, 2089 /*60*/ MatGetSubMatrix_SeqBAIJ, 2090 MatDestroy_SeqBAIJ, 2091 MatView_SeqBAIJ, 2092 0, 2093 0, 2094 /*65*/ 0, 2095 0, 2096 0, 2097 0, 2098 0, 2099 /*70*/ MatGetRowMax_SeqBAIJ, 2100 MatConvert_Basic, 2101 0, 2102 0, 2103 0, 2104 /*75*/ 0, 2105 0, 2106 0, 2107 0, 2108 0, 2109 /*80*/ 0, 2110 0, 2111 0, 2112 0, 2113 MatLoad_SeqBAIJ, 2114 /*85*/ 0, 2115 0, 2116 0, 2117 0, 2118 0, 2119 /*90*/ 0, 2120 0, 2121 0, 2122 0, 2123 0, 2124 /*95*/ 0, 2125 0, 2126 0, 2127 0, 2128 0, 2129 /*100*/0, 2130 0, 2131 0, 2132 0, 2133 0, 2134 /*105*/0, 2135 MatRealPart_SeqBAIJ, 2136 MatImaginaryPart_SeqBAIJ 2137 }; 2138 2139 EXTERN_C_BEGIN 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2142 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2143 { 2144 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2145 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 if (aij->nonew != 1) { 2150 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2151 } 2152 2153 /* allocate space for values if not already there */ 2154 if (!aij->saved_values) { 2155 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2156 } 2157 2158 /* copy values over */ 2159 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 EXTERN_C_END 2163 2164 EXTERN_C_BEGIN 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2167 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2168 { 2169 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2170 PetscErrorCode ierr; 2171 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2172 2173 PetscFunctionBegin; 2174 if (aij->nonew != 1) { 2175 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2176 } 2177 if (!aij->saved_values) { 2178 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2179 } 2180 2181 /* copy values over */ 2182 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 EXTERN_C_END 2186 2187 EXTERN_C_BEGIN 2188 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2189 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2190 EXTERN_C_END 2191 2192 EXTERN_C_BEGIN 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2195 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2196 { 2197 Mat_SeqBAIJ *b; 2198 PetscErrorCode ierr; 2199 PetscInt i,mbs,nbs,bs2,newbs = bs; 2200 PetscTruth flg,skipallocation = PETSC_FALSE; 2201 2202 PetscFunctionBegin; 2203 2204 if (nz == MAT_SKIP_ALLOCATION) { 2205 skipallocation = PETSC_TRUE; 2206 nz = 0; 2207 } 2208 2209 ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2210 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2211 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2212 2213 if (nnz && newbs != bs) { 2214 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2215 } 2216 bs = newbs; 2217 2218 B->rmap.bs = B->cmap.bs = bs; 2219 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2220 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2221 2222 B->preallocated = PETSC_TRUE; 2223 2224 mbs = B->rmap.n/bs; 2225 nbs = B->cmap.n/bs; 2226 bs2 = bs*bs; 2227 2228 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2229 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2230 } 2231 2232 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2233 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2234 if (nnz) { 2235 for (i=0; i<mbs; i++) { 2236 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2237 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2238 } 2239 } 2240 2241 b = (Mat_SeqBAIJ*)B->data; 2242 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2243 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2244 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2245 2246 B->ops->solve = MatSolve_SeqBAIJ_Update; 2247 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2248 if (!flg) { 2249 switch (bs) { 2250 case 1: 2251 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2252 B->ops->mult = MatMult_SeqBAIJ_1; 2253 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2254 break; 2255 case 2: 2256 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2257 B->ops->mult = MatMult_SeqBAIJ_2; 2258 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2259 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2260 break; 2261 case 3: 2262 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2263 B->ops->mult = MatMult_SeqBAIJ_3; 2264 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2265 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2266 break; 2267 case 4: 2268 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2269 B->ops->mult = MatMult_SeqBAIJ_4; 2270 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2271 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2272 break; 2273 case 5: 2274 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2275 B->ops->mult = MatMult_SeqBAIJ_5; 2276 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2277 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2278 break; 2279 case 6: 2280 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2281 B->ops->mult = MatMult_SeqBAIJ_6; 2282 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2283 break; 2284 case 7: 2285 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2286 B->ops->mult = MatMult_SeqBAIJ_7; 2287 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2288 break; 2289 default: 2290 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2291 B->ops->mult = MatMult_SeqBAIJ_N; 2292 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2293 break; 2294 } 2295 } 2296 B->rmap.bs = bs; 2297 b->mbs = mbs; 2298 b->nbs = nbs; 2299 if (!skipallocation) { 2300 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2301 /* b->ilen will count nonzeros in each block row so far. */ 2302 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2303 if (!nnz) { 2304 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2305 else if (nz <= 0) nz = 1; 2306 for (i=0; i<mbs; i++) b->imax[i] = nz; 2307 nz = nz*mbs; 2308 } else { 2309 nz = 0; 2310 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2311 } 2312 2313 /* allocate the matrix space */ 2314 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2315 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2316 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2317 b->singlemalloc = PETSC_TRUE; 2318 2319 b->i[0] = 0; 2320 for (i=1; i<mbs+1; i++) { 2321 b->i[i] = b->i[i-1] + b->imax[i-1]; 2322 } 2323 b->free_a = PETSC_TRUE; 2324 b->free_ij = PETSC_TRUE; 2325 } else { 2326 b->free_a = PETSC_FALSE; 2327 b->free_ij = PETSC_FALSE; 2328 } 2329 2330 B->rmap.bs = bs; 2331 b->bs2 = bs2; 2332 b->mbs = mbs; 2333 b->nz = 0; 2334 b->maxnz = nz*bs2; 2335 B->info.nz_unneeded = (PetscReal)b->maxnz; 2336 PetscFunctionReturn(0); 2337 } 2338 EXTERN_C_END 2339 2340 /*MC 2341 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2342 block sparse compressed row format. 2343 2344 Options Database Keys: 2345 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2346 2347 Level: beginner 2348 2349 .seealso: MatCreateSeqBAIJ() 2350 M*/ 2351 2352 EXTERN_C_BEGIN 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatCreate_SeqBAIJ" 2355 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2356 { 2357 PetscErrorCode ierr; 2358 PetscMPIInt size; 2359 Mat_SeqBAIJ *b; 2360 2361 PetscFunctionBegin; 2362 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2363 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2364 2365 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2366 B->data = (void*)b; 2367 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2368 B->factor = 0; 2369 B->mapping = 0; 2370 b->row = 0; 2371 b->col = 0; 2372 b->icol = 0; 2373 b->reallocs = 0; 2374 b->saved_values = 0; 2375 #if defined(PETSC_USE_MAT_SINGLE) 2376 b->setvalueslen = 0; 2377 b->setvaluescopy = PETSC_NULL; 2378 #endif 2379 2380 b->sorted = PETSC_FALSE; 2381 b->roworiented = PETSC_TRUE; 2382 b->nonew = 0; 2383 b->diag = 0; 2384 b->solve_work = 0; 2385 b->mult_work = 0; 2386 B->spptr = 0; 2387 B->info.nz_unneeded = (PetscReal)b->maxnz; 2388 b->keepzeroedrows = PETSC_FALSE; 2389 b->xtoy = 0; 2390 b->XtoY = 0; 2391 b->compressedrow.use = PETSC_FALSE; 2392 b->compressedrow.nrows = 0; 2393 b->compressedrow.i = PETSC_NULL; 2394 b->compressedrow.rindex = PETSC_NULL; 2395 b->compressedrow.checked = PETSC_FALSE; 2396 B->same_nonzero = PETSC_FALSE; 2397 2398 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2399 "MatInvertBlockDiagonal_SeqBAIJ", 2400 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2402 "MatStoreValues_SeqBAIJ", 2403 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2404 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2405 "MatRetrieveValues_SeqBAIJ", 2406 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2407 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2408 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2409 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2410 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2411 "MatConvert_SeqBAIJ_SeqAIJ", 2412 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2414 "MatConvert_SeqBAIJ_SeqSBAIJ", 2415 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2417 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2418 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2419 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 EXTERN_C_END 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2426 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2427 { 2428 Mat C; 2429 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2430 PetscErrorCode ierr; 2431 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2432 2433 PetscFunctionBegin; 2434 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2435 2436 *B = 0; 2437 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2438 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2439 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2440 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2441 c = (Mat_SeqBAIJ*)C->data; 2442 2443 C->rmap.N = A->rmap.N; 2444 C->cmap.N = A->cmap.N; 2445 C->rmap.bs = A->rmap.bs; 2446 c->bs2 = a->bs2; 2447 c->mbs = a->mbs; 2448 c->nbs = a->nbs; 2449 2450 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2451 for (i=0; i<mbs; i++) { 2452 c->imax[i] = a->imax[i]; 2453 c->ilen[i] = a->ilen[i]; 2454 } 2455 2456 /* allocate the matrix space */ 2457 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2458 c->singlemalloc = PETSC_TRUE; 2459 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2460 if (mbs > 0) { 2461 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2462 if (cpvalues == MAT_COPY_VALUES) { 2463 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2464 } else { 2465 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2466 } 2467 } 2468 c->sorted = a->sorted; 2469 c->roworiented = a->roworiented; 2470 c->nonew = a->nonew; 2471 2472 if (a->diag) { 2473 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2474 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2475 for (i=0; i<mbs; i++) { 2476 c->diag[i] = a->diag[i]; 2477 } 2478 } else c->diag = 0; 2479 c->nz = a->nz; 2480 c->maxnz = a->maxnz; 2481 c->solve_work = 0; 2482 c->mult_work = 0; 2483 c->free_a = PETSC_TRUE; 2484 c->free_ij = PETSC_TRUE; 2485 C->preallocated = PETSC_TRUE; 2486 C->assembled = PETSC_TRUE; 2487 2488 c->compressedrow.use = a->compressedrow.use; 2489 c->compressedrow.nrows = a->compressedrow.nrows; 2490 c->compressedrow.checked = a->compressedrow.checked; 2491 if ( a->compressedrow.checked && a->compressedrow.use){ 2492 i = a->compressedrow.nrows; 2493 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2494 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2495 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2496 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2497 } else { 2498 c->compressedrow.use = PETSC_FALSE; 2499 c->compressedrow.i = PETSC_NULL; 2500 c->compressedrow.rindex = PETSC_NULL; 2501 } 2502 C->same_nonzero = A->same_nonzero; 2503 *B = C; 2504 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 #undef __FUNCT__ 2509 #define __FUNCT__ "MatLoad_SeqBAIJ" 2510 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2511 { 2512 Mat_SeqBAIJ *a; 2513 Mat B; 2514 PetscErrorCode ierr; 2515 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2516 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2517 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2518 PetscInt *masked,nmask,tmp,bs2,ishift; 2519 PetscMPIInt size; 2520 int fd; 2521 PetscScalar *aa; 2522 MPI_Comm comm = ((PetscObject)viewer)->comm; 2523 2524 PetscFunctionBegin; 2525 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2526 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2527 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2528 bs2 = bs*bs; 2529 2530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2531 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2532 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2533 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2534 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2535 M = header[1]; N = header[2]; nz = header[3]; 2536 2537 if (header[3] < 0) { 2538 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2539 } 2540 2541 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2542 2543 /* 2544 This code adds extra rows to make sure the number of rows is 2545 divisible by the blocksize 2546 */ 2547 mbs = M/bs; 2548 extra_rows = bs - M + bs*(mbs); 2549 if (extra_rows == bs) extra_rows = 0; 2550 else mbs++; 2551 if (extra_rows) { 2552 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2553 } 2554 2555 /* read in row lengths */ 2556 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2557 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2558 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2559 2560 /* read in column indices */ 2561 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2562 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2563 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2564 2565 /* loop over row lengths determining block row lengths */ 2566 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2567 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2568 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2569 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2570 masked = mask + mbs; 2571 rowcount = 0; nzcount = 0; 2572 for (i=0; i<mbs; i++) { 2573 nmask = 0; 2574 for (j=0; j<bs; j++) { 2575 kmax = rowlengths[rowcount]; 2576 for (k=0; k<kmax; k++) { 2577 tmp = jj[nzcount++]/bs; 2578 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2579 } 2580 rowcount++; 2581 } 2582 browlengths[i] += nmask; 2583 /* zero out the mask elements we set */ 2584 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2585 } 2586 2587 /* create our matrix */ 2588 ierr = MatCreate(comm,&B); 2589 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2590 ierr = MatSetType(B,type);CHKERRQ(ierr); 2591 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2592 a = (Mat_SeqBAIJ*)B->data; 2593 2594 /* set matrix "i" values */ 2595 a->i[0] = 0; 2596 for (i=1; i<= mbs; i++) { 2597 a->i[i] = a->i[i-1] + browlengths[i-1]; 2598 a->ilen[i-1] = browlengths[i-1]; 2599 } 2600 a->nz = 0; 2601 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2602 2603 /* read in nonzero values */ 2604 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2605 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2606 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2607 2608 /* set "a" and "j" values into matrix */ 2609 nzcount = 0; jcount = 0; 2610 for (i=0; i<mbs; i++) { 2611 nzcountb = nzcount; 2612 nmask = 0; 2613 for (j=0; j<bs; j++) { 2614 kmax = rowlengths[i*bs+j]; 2615 for (k=0; k<kmax; k++) { 2616 tmp = jj[nzcount++]/bs; 2617 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2618 } 2619 } 2620 /* sort the masked values */ 2621 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2622 2623 /* set "j" values into matrix */ 2624 maskcount = 1; 2625 for (j=0; j<nmask; j++) { 2626 a->j[jcount++] = masked[j]; 2627 mask[masked[j]] = maskcount++; 2628 } 2629 /* set "a" values into matrix */ 2630 ishift = bs2*a->i[i]; 2631 for (j=0; j<bs; j++) { 2632 kmax = rowlengths[i*bs+j]; 2633 for (k=0; k<kmax; k++) { 2634 tmp = jj[nzcountb]/bs ; 2635 block = mask[tmp] - 1; 2636 point = jj[nzcountb] - bs*tmp; 2637 idx = ishift + bs2*block + j + bs*point; 2638 a->a[idx] = (MatScalar)aa[nzcountb++]; 2639 } 2640 } 2641 /* zero out the mask elements we set */ 2642 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2643 } 2644 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2645 2646 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2647 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2648 ierr = PetscFree(aa);CHKERRQ(ierr); 2649 ierr = PetscFree(jj);CHKERRQ(ierr); 2650 ierr = PetscFree(mask);CHKERRQ(ierr); 2651 2652 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2653 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2654 ierr = MatView_Private(B);CHKERRQ(ierr); 2655 2656 *A = B; 2657 PetscFunctionReturn(0); 2658 } 2659 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "MatCreateSeqBAIJ" 2662 /*@C 2663 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2664 compressed row) format. For good matrix assembly performance the 2665 user should preallocate the matrix storage by setting the parameter nz 2666 (or the array nnz). By setting these parameters accurately, performance 2667 during matrix assembly can be increased by more than a factor of 50. 2668 2669 Collective on MPI_Comm 2670 2671 Input Parameters: 2672 + comm - MPI communicator, set to PETSC_COMM_SELF 2673 . bs - size of block 2674 . m - number of rows 2675 . n - number of columns 2676 . nz - number of nonzero blocks per block row (same for all rows) 2677 - nnz - array containing the number of nonzero blocks in the various block rows 2678 (possibly different for each block row) or PETSC_NULL 2679 2680 Output Parameter: 2681 . A - the matrix 2682 2683 Options Database Keys: 2684 . -mat_no_unroll - uses code that does not unroll the loops in the 2685 block calculations (much slower) 2686 . -mat_block_size - size of the blocks to use 2687 2688 Level: intermediate 2689 2690 Notes: 2691 The number of rows and columns must be divisible by blocksize. 2692 2693 If the nnz parameter is given then the nz parameter is ignored 2694 2695 A nonzero block is any block that as 1 or more nonzeros in it 2696 2697 The block AIJ format is fully compatible with standard Fortran 77 2698 storage. That is, the stored row and column indices can begin at 2699 either one (as in Fortran) or zero. See the users' manual for details. 2700 2701 Specify the preallocated storage with either nz or nnz (not both). 2702 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2703 allocation. For additional details, see the users manual chapter on 2704 matrices. 2705 2706 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2707 @*/ 2708 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2709 { 2710 PetscErrorCode ierr; 2711 2712 PetscFunctionBegin; 2713 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2714 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2715 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2716 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2717 PetscFunctionReturn(0); 2718 } 2719 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2722 /*@C 2723 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2724 per row in the matrix. For good matrix assembly performance the 2725 user should preallocate the matrix storage by setting the parameter nz 2726 (or the array nnz). By setting these parameters accurately, performance 2727 during matrix assembly can be increased by more than a factor of 50. 2728 2729 Collective on MPI_Comm 2730 2731 Input Parameters: 2732 + A - the matrix 2733 . bs - size of block 2734 . nz - number of block nonzeros per block row (same for all rows) 2735 - nnz - array containing the number of block nonzeros in the various block rows 2736 (possibly different for each block row) or PETSC_NULL 2737 2738 Options Database Keys: 2739 . -mat_no_unroll - uses code that does not unroll the loops in the 2740 block calculations (much slower) 2741 . -mat_block_size - size of the blocks to use 2742 2743 Level: intermediate 2744 2745 Notes: 2746 If the nnz parameter is given then the nz parameter is ignored 2747 2748 The block AIJ format is fully compatible with standard Fortran 77 2749 storage. That is, the stored row and column indices can begin at 2750 either one (as in Fortran) or zero. See the users' manual for details. 2751 2752 Specify the preallocated storage with either nz or nnz (not both). 2753 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2754 allocation. For additional details, see the users manual chapter on 2755 matrices. 2756 2757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2758 @*/ 2759 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2760 { 2761 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2762 2763 PetscFunctionBegin; 2764 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2765 if (f) { 2766 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2767 } 2768 PetscFunctionReturn(0); 2769 } 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2773 /*@ 2774 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2775 (upper triangular entries in CSR format) provided by the user. 2776 2777 Collective on MPI_Comm 2778 2779 Input Parameters: 2780 + comm - must be an MPI communicator of size 1 2781 . bs - size of block 2782 . m - number of rows 2783 . n - number of columns 2784 . i - row indices 2785 . j - column indices 2786 - a - matrix values 2787 2788 Output Parameter: 2789 . mat - the matrix 2790 2791 Level: intermediate 2792 2793 Notes: 2794 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2795 once the matrix is destroyed 2796 2797 You cannot set new nonzero locations into this matrix, that will generate an error. 2798 2799 The i and j indices are 0 based 2800 2801 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2802 2803 @*/ 2804 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2805 { 2806 PetscErrorCode ierr; 2807 PetscInt ii; 2808 Mat_SeqBAIJ *baij; 2809 2810 PetscFunctionBegin; 2811 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2812 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2813 2814 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2815 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2816 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2817 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2818 baij = (Mat_SeqBAIJ*)(*mat)->data; 2819 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2820 2821 baij->i = i; 2822 baij->j = j; 2823 baij->a = a; 2824 baij->singlemalloc = PETSC_FALSE; 2825 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2826 baij->free_a = PETSC_FALSE; 2827 baij->free_ij = PETSC_FALSE; 2828 2829 for (ii=0; ii<m; ii++) { 2830 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2831 #if defined(PETSC_USE_DEBUG) 2832 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2833 #endif 2834 } 2835 #if defined(PETSC_USE_DEBUG) 2836 for (ii=0; ii<baij->i[m]; ii++) { 2837 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2838 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2839 } 2840 #endif 2841 2842 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2843 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 PetscFunctionReturn(0); 2845 } 2846