1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt n = a->mbs,i; 785 786 PetscFunctionBegin; 787 *nn = n; 788 if (!ia) PetscFunctionReturn(0); 789 if (symmetric) { 790 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 791 } else if (oshift == 1) { 792 /* temporarily add 1 to i and j indices */ 793 PetscInt nz = a->i[n]; 794 for (i=0; i<nz; i++) a->j[i]++; 795 for (i=0; i<n+1; i++) a->i[i]++; 796 *ia = a->i; *ja = a->j; 797 } else { 798 *ia = a->i; *ja = a->j; 799 } 800 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscErrorCode ierr; 810 PetscInt i,n = a->mbs; 811 812 PetscFunctionBegin; 813 if (!ia) PetscFunctionReturn(0); 814 if (symmetric) { 815 ierr = PetscFree(*ia);CHKERRQ(ierr); 816 ierr = PetscFree(*ja);CHKERRQ(ierr); 817 } else if (oshift == 1) { 818 PetscInt nz = a->i[n]-1; 819 for (i=0; i<nz; i++) a->j[i]--; 820 for (i=0; i<n+1; i++) a->i[i]--; 821 } 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatDestroy_SeqBAIJ" 827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 #if defined(PETSC_USE_LOG) 834 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 835 #endif 836 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 837 if (a->row) { 838 ierr = ISDestroy(a->row);CHKERRQ(ierr); 839 } 840 if (a->col) { 841 ierr = ISDestroy(a->col);CHKERRQ(ierr); 842 } 843 ierr = PetscFree(a->diag);CHKERRQ(ierr); 844 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 845 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 846 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 847 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 848 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 849 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 850 #if defined(PETSC_USE_MAT_SINGLE) 851 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 852 #endif 853 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 854 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 855 856 ierr = PetscFree(a);CHKERRQ(ierr); 857 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSetOption_SeqBAIJ" 870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 871 { 872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 873 PetscErrorCode ierr; 874 875 PetscFunctionBegin; 876 switch (op) { 877 case MAT_ROW_ORIENTED: 878 a->roworiented = PETSC_TRUE; 879 break; 880 case MAT_COLUMN_ORIENTED: 881 a->roworiented = PETSC_FALSE; 882 break; 883 case MAT_COLUMNS_SORTED: 884 a->sorted = PETSC_TRUE; 885 break; 886 case MAT_COLUMNS_UNSORTED: 887 a->sorted = PETSC_FALSE; 888 break; 889 case MAT_KEEP_ZEROED_ROWS: 890 a->keepzeroedrows = PETSC_TRUE; 891 break; 892 case MAT_NO_NEW_NONZERO_LOCATIONS: 893 a->nonew = 1; 894 break; 895 case MAT_NEW_NONZERO_LOCATION_ERR: 896 a->nonew = -1; 897 break; 898 case MAT_NEW_NONZERO_ALLOCATION_ERR: 899 a->nonew = -2; 900 break; 901 case MAT_YES_NEW_NONZERO_LOCATIONS: 902 a->nonew = 0; 903 break; 904 case MAT_ROWS_SORTED: 905 case MAT_ROWS_UNSORTED: 906 case MAT_YES_NEW_DIAGONALS: 907 case MAT_IGNORE_OFF_PROC_ENTRIES: 908 case MAT_USE_HASH_TABLE: 909 ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr); 910 break; 911 case MAT_NO_NEW_DIAGONALS: 912 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 913 case MAT_SYMMETRIC: 914 case MAT_STRUCTURALLY_SYMMETRIC: 915 case MAT_NOT_SYMMETRIC: 916 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 917 case MAT_HERMITIAN: 918 case MAT_NOT_HERMITIAN: 919 case MAT_SYMMETRY_ETERNAL: 920 case MAT_NOT_SYMMETRY_ETERNAL: 921 break; 922 default: 923 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 924 } 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatGetRow_SeqBAIJ" 930 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 931 { 932 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 933 PetscErrorCode ierr; 934 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 935 MatScalar *aa,*aa_i; 936 PetscScalar *v_i; 937 938 PetscFunctionBegin; 939 bs = A->rmap.bs; 940 ai = a->i; 941 aj = a->j; 942 aa = a->a; 943 bs2 = a->bs2; 944 945 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 946 947 bn = row/bs; /* Block number */ 948 bp = row % bs; /* Block Position */ 949 M = ai[bn+1] - ai[bn]; 950 *nz = bs*M; 951 952 if (v) { 953 *v = 0; 954 if (*nz) { 955 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 956 for (i=0; i<M; i++) { /* for each block in the block row */ 957 v_i = *v + i*bs; 958 aa_i = aa + bs2*(ai[bn] + i); 959 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 960 } 961 } 962 } 963 964 if (idx) { 965 *idx = 0; 966 if (*nz) { 967 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 968 for (i=0; i<M; i++) { /* for each block in the block row */ 969 idx_i = *idx + i*bs; 970 itmp = bs*aj[ai[bn] + i]; 971 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 972 } 973 } 974 } 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 980 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 981 { 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 986 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatTranspose_SeqBAIJ" 992 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 993 { 994 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 995 Mat C; 996 PetscErrorCode ierr; 997 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 998 PetscInt *rows,*cols,bs2=a->bs2; 999 PetscScalar *array; 1000 1001 PetscFunctionBegin; 1002 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1003 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1004 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1005 1006 #if defined(PETSC_USE_MAT_SINGLE) 1007 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1008 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1009 #else 1010 array = a->a; 1011 #endif 1012 1013 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1014 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1015 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1016 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1017 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1018 ierr = PetscFree(col);CHKERRQ(ierr); 1019 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1020 cols = rows + bs; 1021 for (i=0; i<mbs; i++) { 1022 cols[0] = i*bs; 1023 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1024 len = ai[i+1] - ai[i]; 1025 for (j=0; j<len; j++) { 1026 rows[0] = (*aj++)*bs; 1027 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1028 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1029 array += bs2; 1030 } 1031 } 1032 ierr = PetscFree(rows);CHKERRQ(ierr); 1033 #if defined(PETSC_USE_MAT_SINGLE) 1034 ierr = PetscFree(array); 1035 #endif 1036 1037 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 1040 if (B) { 1041 *B = C; 1042 } else { 1043 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1044 } 1045 PetscFunctionReturn(0); 1046 } 1047 1048 #undef __FUNCT__ 1049 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1050 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1051 { 1052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1055 int fd; 1056 PetscScalar *aa; 1057 FILE *file; 1058 1059 PetscFunctionBegin; 1060 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1061 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1062 col_lens[0] = MAT_FILE_COOKIE; 1063 1064 col_lens[1] = A->rmap.N; 1065 col_lens[2] = A->cmap.n; 1066 col_lens[3] = a->nz*bs2; 1067 1068 /* store lengths of each row and write (including header) to file */ 1069 count = 0; 1070 for (i=0; i<a->mbs; i++) { 1071 for (j=0; j<bs; j++) { 1072 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1073 } 1074 } 1075 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1076 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1077 1078 /* store column indices (zero start index) */ 1079 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1080 count = 0; 1081 for (i=0; i<a->mbs; i++) { 1082 for (j=0; j<bs; j++) { 1083 for (k=a->i[i]; k<a->i[i+1]; k++) { 1084 for (l=0; l<bs; l++) { 1085 jj[count++] = bs*a->j[k] + l; 1086 } 1087 } 1088 } 1089 } 1090 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1091 ierr = PetscFree(jj);CHKERRQ(ierr); 1092 1093 /* store nonzero values */ 1094 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1095 count = 0; 1096 for (i=0; i<a->mbs; i++) { 1097 for (j=0; j<bs; j++) { 1098 for (k=a->i[i]; k<a->i[i+1]; k++) { 1099 for (l=0; l<bs; l++) { 1100 aa[count++] = a->a[bs2*k + l*bs + j]; 1101 } 1102 } 1103 } 1104 } 1105 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1106 ierr = PetscFree(aa);CHKERRQ(ierr); 1107 1108 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1109 if (file) { 1110 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1117 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1118 { 1119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1120 PetscErrorCode ierr; 1121 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1122 PetscViewerFormat format; 1123 1124 PetscFunctionBegin; 1125 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1126 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1127 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1128 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1129 Mat aij; 1130 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1131 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1132 ierr = MatDestroy(aij);CHKERRQ(ierr); 1133 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1134 PetscFunctionReturn(0); 1135 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1136 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1137 for (i=0; i<a->mbs; i++) { 1138 for (j=0; j<bs; j++) { 1139 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1140 for (k=a->i[i]; k<a->i[i+1]; k++) { 1141 for (l=0; l<bs; l++) { 1142 #if defined(PETSC_USE_COMPLEX) 1143 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1144 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1145 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1146 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1147 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1148 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1150 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1151 } 1152 #else 1153 if (a->a[bs2*k + l*bs + j] != 0.0) { 1154 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1155 } 1156 #endif 1157 } 1158 } 1159 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1160 } 1161 } 1162 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1163 } else { 1164 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1165 for (i=0; i<a->mbs; i++) { 1166 for (j=0; j<bs; j++) { 1167 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1168 for (k=a->i[i]; k<a->i[i+1]; k++) { 1169 for (l=0; l<bs; l++) { 1170 #if defined(PETSC_USE_COMPLEX) 1171 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1172 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1173 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1174 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1176 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1177 } else { 1178 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1179 } 1180 #else 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1182 #endif 1183 } 1184 } 1185 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1186 } 1187 } 1188 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1189 } 1190 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1196 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1197 { 1198 Mat A = (Mat) Aa; 1199 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1202 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1203 MatScalar *aa; 1204 PetscViewer viewer; 1205 1206 PetscFunctionBegin; 1207 1208 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1209 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1210 1211 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1212 1213 /* loop over matrix elements drawing boxes */ 1214 color = PETSC_DRAW_BLUE; 1215 for (i=0,row=0; i<mbs; i++,row+=bs) { 1216 for (j=a->i[i]; j<a->i[i+1]; j++) { 1217 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1218 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1219 aa = a->a + j*bs2; 1220 for (k=0; k<bs; k++) { 1221 for (l=0; l<bs; l++) { 1222 if (PetscRealPart(*aa++) >= 0.) continue; 1223 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1224 } 1225 } 1226 } 1227 } 1228 color = PETSC_DRAW_CYAN; 1229 for (i=0,row=0; i<mbs; i++,row+=bs) { 1230 for (j=a->i[i]; j<a->i[i+1]; j++) { 1231 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1232 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1233 aa = a->a + j*bs2; 1234 for (k=0; k<bs; k++) { 1235 for (l=0; l<bs; l++) { 1236 if (PetscRealPart(*aa++) != 0.) continue; 1237 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1238 } 1239 } 1240 } 1241 } 1242 1243 color = PETSC_DRAW_RED; 1244 for (i=0,row=0; i<mbs; i++,row+=bs) { 1245 for (j=a->i[i]; j<a->i[i+1]; j++) { 1246 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1247 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1248 aa = a->a + j*bs2; 1249 for (k=0; k<bs; k++) { 1250 for (l=0; l<bs; l++) { 1251 if (PetscRealPart(*aa++) <= 0.) continue; 1252 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1253 } 1254 } 1255 } 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1262 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1263 { 1264 PetscErrorCode ierr; 1265 PetscReal xl,yl,xr,yr,w,h; 1266 PetscDraw draw; 1267 PetscTruth isnull; 1268 1269 PetscFunctionBegin; 1270 1271 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1272 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1273 1274 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1275 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1276 xr += w; yr += h; xl = -w; yl = -h; 1277 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1278 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1279 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_SeqBAIJ" 1285 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1286 { 1287 PetscErrorCode ierr; 1288 PetscTruth iascii,isbinary,isdraw; 1289 1290 PetscFunctionBegin; 1291 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1292 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1294 if (iascii){ 1295 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1296 } else if (isbinary) { 1297 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1298 } else if (isdraw) { 1299 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1300 } else { 1301 Mat B; 1302 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1303 ierr = MatView(B,viewer);CHKERRQ(ierr); 1304 ierr = MatDestroy(B);CHKERRQ(ierr); 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1312 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1316 PetscInt *ai = a->i,*ailen = a->ilen; 1317 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1318 MatScalar *ap,*aa = a->a,zero = 0.0; 1319 1320 PetscFunctionBegin; 1321 for (k=0; k<m; k++) { /* loop over rows */ 1322 row = im[k]; brow = row/bs; 1323 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1324 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1325 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1326 nrow = ailen[brow]; 1327 for (l=0; l<n; l++) { /* loop over columns */ 1328 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1329 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1330 col = in[l] ; 1331 bcol = col/bs; 1332 cidx = col%bs; 1333 ridx = row%bs; 1334 high = nrow; 1335 low = 0; /* assume unsorted */ 1336 while (high-low > 5) { 1337 t = (low+high)/2; 1338 if (rp[t] > bcol) high = t; 1339 else low = t; 1340 } 1341 for (i=low; i<high; i++) { 1342 if (rp[i] > bcol) break; 1343 if (rp[i] == bcol) { 1344 *v++ = ap[bs2*i+bs*cidx+ridx]; 1345 goto finished; 1346 } 1347 } 1348 *v++ = zero; 1349 finished:; 1350 } 1351 } 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #if defined(PETSC_USE_MAT_SINGLE) 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1358 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1359 { 1360 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1361 PetscErrorCode ierr; 1362 PetscInt i,N = m*n*b->bs2; 1363 MatScalar *vsingle; 1364 1365 PetscFunctionBegin; 1366 if (N > b->setvalueslen) { 1367 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1368 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1369 b->setvalueslen = N; 1370 } 1371 vsingle = b->setvaluescopy; 1372 for (i=0; i<N; i++) { 1373 vsingle[i] = v[i]; 1374 } 1375 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 #endif 1379 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1383 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1384 { 1385 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1386 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1387 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1388 PetscErrorCode ierr; 1389 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1390 PetscTruth roworiented=a->roworiented; 1391 const MatScalar *value = v; 1392 MatScalar *ap,*aa = a->a,*bap; 1393 1394 PetscFunctionBegin; 1395 if (roworiented) { 1396 stepval = (n-1)*bs; 1397 } else { 1398 stepval = (m-1)*bs; 1399 } 1400 for (k=0; k<m; k++) { /* loop over added rows */ 1401 row = im[k]; 1402 if (row < 0) continue; 1403 #if defined(PETSC_USE_DEBUG) 1404 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1405 #endif 1406 rp = aj + ai[row]; 1407 ap = aa + bs2*ai[row]; 1408 rmax = imax[row]; 1409 nrow = ailen[row]; 1410 low = 0; 1411 high = nrow; 1412 for (l=0; l<n; l++) { /* loop over added columns */ 1413 if (in[l] < 0) continue; 1414 #if defined(PETSC_USE_DEBUG) 1415 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1416 #endif 1417 col = in[l]; 1418 if (roworiented) { 1419 value = v + k*(stepval+bs)*bs + l*bs; 1420 } else { 1421 value = v + l*(stepval+bs)*bs + k*bs; 1422 } 1423 if (col <= lastcol) low = 0; else high = nrow; 1424 lastcol = col; 1425 while (high-low > 7) { 1426 t = (low+high)/2; 1427 if (rp[t] > col) high = t; 1428 else low = t; 1429 } 1430 for (i=low; i<high; i++) { 1431 if (rp[i] > col) break; 1432 if (rp[i] == col) { 1433 bap = ap + bs2*i; 1434 if (roworiented) { 1435 if (is == ADD_VALUES) { 1436 for (ii=0; ii<bs; ii++,value+=stepval) { 1437 for (jj=ii; jj<bs2; jj+=bs) { 1438 bap[jj] += *value++; 1439 } 1440 } 1441 } else { 1442 for (ii=0; ii<bs; ii++,value+=stepval) { 1443 for (jj=ii; jj<bs2; jj+=bs) { 1444 bap[jj] = *value++; 1445 } 1446 } 1447 } 1448 } else { 1449 if (is == ADD_VALUES) { 1450 for (ii=0; ii<bs; ii++,value+=stepval) { 1451 for (jj=0; jj<bs; jj++) { 1452 *bap++ += *value++; 1453 } 1454 } 1455 } else { 1456 for (ii=0; ii<bs; ii++,value+=stepval) { 1457 for (jj=0; jj<bs; jj++) { 1458 *bap++ = *value++; 1459 } 1460 } 1461 } 1462 } 1463 goto noinsert2; 1464 } 1465 } 1466 if (nonew == 1) goto noinsert2; 1467 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1468 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew); 1469 N = nrow++ - 1; high++; 1470 /* shift up all the later entries in this row */ 1471 for (ii=N; ii>=i; ii--) { 1472 rp[ii+1] = rp[ii]; 1473 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1474 } 1475 if (N >= i) { 1476 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1477 } 1478 rp[i] = col; 1479 bap = ap + bs2*i; 1480 if (roworiented) { 1481 for (ii=0; ii<bs; ii++,value+=stepval) { 1482 for (jj=ii; jj<bs2; jj+=bs) { 1483 bap[jj] = *value++; 1484 } 1485 } 1486 } else { 1487 for (ii=0; ii<bs; ii++,value+=stepval) { 1488 for (jj=0; jj<bs; jj++) { 1489 *bap++ = *value++; 1490 } 1491 } 1492 } 1493 noinsert2:; 1494 low = i; 1495 } 1496 ailen[row] = nrow; 1497 } 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1503 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1504 { 1505 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1506 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1507 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1508 PetscErrorCode ierr; 1509 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1510 MatScalar *aa = a->a,*ap; 1511 PetscReal ratio=0.6; 1512 1513 PetscFunctionBegin; 1514 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1515 1516 if (m) rmax = ailen[0]; 1517 for (i=1; i<mbs; i++) { 1518 /* move each row back by the amount of empty slots (fshift) before it*/ 1519 fshift += imax[i-1] - ailen[i-1]; 1520 rmax = PetscMax(rmax,ailen[i]); 1521 if (fshift) { 1522 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1523 N = ailen[i]; 1524 for (j=0; j<N; j++) { 1525 ip[j-fshift] = ip[j]; 1526 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1527 } 1528 } 1529 ai[i] = ai[i-1] + ailen[i-1]; 1530 } 1531 if (mbs) { 1532 fshift += imax[mbs-1] - ailen[mbs-1]; 1533 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1534 } 1535 /* reset ilen and imax for each row */ 1536 for (i=0; i<mbs; i++) { 1537 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1538 } 1539 a->nz = ai[mbs]; 1540 1541 /* diagonals may have moved, so kill the diagonal pointers */ 1542 a->idiagvalid = PETSC_FALSE; 1543 if (fshift && a->diag) { 1544 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1545 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1546 a->diag = 0; 1547 } 1548 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1549 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1550 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1551 a->reallocs = 0; 1552 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1553 1554 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1555 if (a->compressedrow.use){ 1556 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1557 } 1558 1559 A->same_nonzero = PETSC_TRUE; 1560 PetscFunctionReturn(0); 1561 } 1562 1563 /* 1564 This function returns an array of flags which indicate the locations of contiguous 1565 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1566 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1567 Assume: sizes should be long enough to hold all the values. 1568 */ 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1571 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1572 { 1573 PetscInt i,j,k,row; 1574 PetscTruth flg; 1575 1576 PetscFunctionBegin; 1577 for (i=0,j=0; i<n; j++) { 1578 row = idx[i]; 1579 if (row%bs!=0) { /* Not the begining of a block */ 1580 sizes[j] = 1; 1581 i++; 1582 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1583 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1584 i++; 1585 } else { /* Begining of the block, so check if the complete block exists */ 1586 flg = PETSC_TRUE; 1587 for (k=1; k<bs; k++) { 1588 if (row+k != idx[i+k]) { /* break in the block */ 1589 flg = PETSC_FALSE; 1590 break; 1591 } 1592 } 1593 if (flg) { /* No break in the bs */ 1594 sizes[j] = bs; 1595 i+= bs; 1596 } else { 1597 sizes[j] = 1; 1598 i++; 1599 } 1600 } 1601 } 1602 *bs_max = j; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1608 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1609 { 1610 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1611 PetscErrorCode ierr; 1612 PetscInt i,j,k,count,*rows; 1613 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1614 PetscScalar zero = 0.0; 1615 MatScalar *aa; 1616 1617 PetscFunctionBegin; 1618 /* Make a copy of the IS and sort it */ 1619 /* allocate memory for rows,sizes */ 1620 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1621 sizes = rows + is_n; 1622 1623 /* copy IS values to rows, and sort them */ 1624 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1625 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1626 if (baij->keepzeroedrows) { 1627 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1628 bs_max = is_n; 1629 A->same_nonzero = PETSC_TRUE; 1630 } else { 1631 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1632 A->same_nonzero = PETSC_FALSE; 1633 } 1634 1635 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1636 row = rows[j]; 1637 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1638 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1639 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1640 if (sizes[i] == bs && !baij->keepzeroedrows) { 1641 if (diag != 0.0) { 1642 if (baij->ilen[row/bs] > 0) { 1643 baij->ilen[row/bs] = 1; 1644 baij->j[baij->i[row/bs]] = row/bs; 1645 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1646 } 1647 /* Now insert all the diagonal values for this bs */ 1648 for (k=0; k<bs; k++) { 1649 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1650 } 1651 } else { /* (diag == 0.0) */ 1652 baij->ilen[row/bs] = 0; 1653 } /* end (diag == 0.0) */ 1654 } else { /* (sizes[i] != bs) */ 1655 #if defined (PETSC_USE_DEBUG) 1656 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1657 #endif 1658 for (k=0; k<count; k++) { 1659 aa[0] = zero; 1660 aa += bs; 1661 } 1662 if (diag != 0.0) { 1663 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1664 } 1665 } 1666 } 1667 1668 ierr = PetscFree(rows);CHKERRQ(ierr); 1669 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1675 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1676 { 1677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1678 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1679 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1680 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1681 PetscErrorCode ierr; 1682 PetscInt ridx,cidx,bs2=a->bs2; 1683 PetscTruth roworiented=a->roworiented; 1684 MatScalar *ap,value,*aa=a->a,*bap; 1685 1686 PetscFunctionBegin; 1687 for (k=0; k<m; k++) { /* loop over added rows */ 1688 row = im[k]; 1689 brow = row/bs; 1690 if (row < 0) continue; 1691 #if defined(PETSC_USE_DEBUG) 1692 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1693 #endif 1694 rp = aj + ai[brow]; 1695 ap = aa + bs2*ai[brow]; 1696 rmax = imax[brow]; 1697 nrow = ailen[brow]; 1698 low = 0; 1699 high = nrow; 1700 for (l=0; l<n; l++) { /* loop over added columns */ 1701 if (in[l] < 0) continue; 1702 #if defined(PETSC_USE_DEBUG) 1703 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1704 #endif 1705 col = in[l]; bcol = col/bs; 1706 ridx = row % bs; cidx = col % bs; 1707 if (roworiented) { 1708 value = v[l + k*n]; 1709 } else { 1710 value = v[k + l*m]; 1711 } 1712 if (col <= lastcol) low = 0; else high = nrow; 1713 lastcol = col; 1714 while (high-low > 7) { 1715 t = (low+high)/2; 1716 if (rp[t] > bcol) high = t; 1717 else low = t; 1718 } 1719 for (i=low; i<high; i++) { 1720 if (rp[i] > bcol) break; 1721 if (rp[i] == bcol) { 1722 bap = ap + bs2*i + bs*cidx + ridx; 1723 if (is == ADD_VALUES) *bap += value; 1724 else *bap = value; 1725 goto noinsert1; 1726 } 1727 } 1728 if (nonew == 1) goto noinsert1; 1729 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1730 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew); 1731 N = nrow++ - 1; high++; 1732 /* shift up all the later entries in this row */ 1733 for (ii=N; ii>=i; ii--) { 1734 rp[ii+1] = rp[ii]; 1735 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1736 } 1737 if (N>=i) { 1738 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1739 } 1740 rp[i] = bcol; 1741 ap[bs2*i + bs*cidx + ridx] = value; 1742 a->nz++; 1743 noinsert1:; 1744 low = i; 1745 } 1746 ailen[brow] = nrow; 1747 } 1748 A->same_nonzero = PETSC_FALSE; 1749 PetscFunctionReturn(0); 1750 } 1751 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1755 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1756 { 1757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1758 Mat outA; 1759 PetscErrorCode ierr; 1760 PetscTruth row_identity,col_identity; 1761 1762 PetscFunctionBegin; 1763 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1764 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1765 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1766 if (!row_identity || !col_identity) { 1767 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1768 } 1769 1770 outA = inA; 1771 inA->factor = FACTOR_LU; 1772 1773 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1774 1775 a->row = row; 1776 a->col = col; 1777 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1778 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1779 1780 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1781 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1782 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1783 1784 /* 1785 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1786 for ILU(0) factorization with natural ordering 1787 */ 1788 if (inA->rmap.bs < 8) { 1789 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1790 } else { 1791 if (!a->solve_work) { 1792 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1793 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1794 } 1795 } 1796 1797 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1798 1799 PetscFunctionReturn(0); 1800 } 1801 1802 EXTERN_C_BEGIN 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1805 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1806 { 1807 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1808 PetscInt i,nz,nbs; 1809 1810 PetscFunctionBegin; 1811 nz = baij->maxnz/baij->bs2; 1812 nbs = baij->nbs; 1813 for (i=0; i<nz; i++) { 1814 baij->j[i] = indices[i]; 1815 } 1816 baij->nz = nz; 1817 for (i=0; i<nbs; i++) { 1818 baij->ilen[i] = baij->imax[i]; 1819 } 1820 1821 PetscFunctionReturn(0); 1822 } 1823 EXTERN_C_END 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1827 /*@ 1828 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1829 in the matrix. 1830 1831 Input Parameters: 1832 + mat - the SeqBAIJ matrix 1833 - indices - the column indices 1834 1835 Level: advanced 1836 1837 Notes: 1838 This can be called if you have precomputed the nonzero structure of the 1839 matrix and want to provide it to the matrix object to improve the performance 1840 of the MatSetValues() operation. 1841 1842 You MUST have set the correct numbers of nonzeros per row in the call to 1843 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1844 1845 MUST be called before any calls to MatSetValues(); 1846 1847 @*/ 1848 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1849 { 1850 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1851 1852 PetscFunctionBegin; 1853 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1854 PetscValidPointer(indices,2); 1855 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1856 if (f) { 1857 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1858 } else { 1859 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1866 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1867 { 1868 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1869 PetscErrorCode ierr; 1870 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1871 PetscReal atmp; 1872 PetscScalar *x,zero = 0.0; 1873 MatScalar *aa; 1874 PetscInt ncols,brow,krow,kcol; 1875 1876 PetscFunctionBegin; 1877 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1878 bs = A->rmap.bs; 1879 aa = a->a; 1880 ai = a->i; 1881 aj = a->j; 1882 mbs = a->mbs; 1883 1884 ierr = VecSet(v,zero);CHKERRQ(ierr); 1885 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1886 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1887 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1888 for (i=0; i<mbs; i++) { 1889 ncols = ai[1] - ai[0]; ai++; 1890 brow = bs*i; 1891 for (j=0; j<ncols; j++){ 1892 /* bcol = bs*(*aj); */ 1893 for (kcol=0; kcol<bs; kcol++){ 1894 for (krow=0; krow<bs; krow++){ 1895 atmp = PetscAbsScalar(*aa); aa++; 1896 row = brow + krow; /* row index */ 1897 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1898 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1899 } 1900 } 1901 aj++; 1902 } 1903 } 1904 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatCopy_SeqBAIJ" 1910 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1911 { 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 /* If the two matrices have the same copy implementation, use fast copy. */ 1916 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1917 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1918 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1919 1920 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1921 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1922 } 1923 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1924 } else { 1925 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1926 } 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1932 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1933 { 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1943 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1944 { 1945 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1946 PetscFunctionBegin; 1947 *array = a->a; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1953 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1954 { 1955 PetscFunctionBegin; 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #include "petscblaslapack.h" 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1962 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1963 { 1964 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1965 PetscErrorCode ierr; 1966 PetscInt i,bs=Y->rmap.bs,j,bs2; 1967 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1968 1969 PetscFunctionBegin; 1970 if (str == SAME_NONZERO_PATTERN) { 1971 PetscScalar alpha = a; 1972 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1973 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1974 if (y->xtoy && y->XtoY != X) { 1975 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1976 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1977 } 1978 if (!y->xtoy) { /* get xtoy */ 1979 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1980 y->XtoY = X; 1981 } 1982 bs2 = bs*bs; 1983 for (i=0; i<x->nz; i++) { 1984 j = 0; 1985 while (j < bs2){ 1986 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1987 j++; 1988 } 1989 } 1990 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1991 } else { 1992 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRealPart_SeqBAIJ" 1999 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2000 { 2001 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2002 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2003 PetscScalar *aa = a->a; 2004 2005 PetscFunctionBegin; 2006 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2007 PetscFunctionReturn(0); 2008 } 2009 2010 #undef __FUNCT__ 2011 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2012 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2013 { 2014 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2015 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2016 PetscScalar *aa = a->a; 2017 2018 PetscFunctionBegin; 2019 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 2024 /* -------------------------------------------------------------------*/ 2025 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2026 MatGetRow_SeqBAIJ, 2027 MatRestoreRow_SeqBAIJ, 2028 MatMult_SeqBAIJ_N, 2029 /* 4*/ MatMultAdd_SeqBAIJ_N, 2030 MatMultTranspose_SeqBAIJ, 2031 MatMultTransposeAdd_SeqBAIJ, 2032 MatSolve_SeqBAIJ_N, 2033 0, 2034 0, 2035 /*10*/ 0, 2036 MatLUFactor_SeqBAIJ, 2037 0, 2038 0, 2039 MatTranspose_SeqBAIJ, 2040 /*15*/ MatGetInfo_SeqBAIJ, 2041 MatEqual_SeqBAIJ, 2042 MatGetDiagonal_SeqBAIJ, 2043 MatDiagonalScale_SeqBAIJ, 2044 MatNorm_SeqBAIJ, 2045 /*20*/ 0, 2046 MatAssemblyEnd_SeqBAIJ, 2047 0, 2048 MatSetOption_SeqBAIJ, 2049 MatZeroEntries_SeqBAIJ, 2050 /*25*/ MatZeroRows_SeqBAIJ, 2051 MatLUFactorSymbolic_SeqBAIJ, 2052 MatLUFactorNumeric_SeqBAIJ_N, 2053 MatCholeskyFactorSymbolic_SeqBAIJ, 2054 MatCholeskyFactorNumeric_SeqBAIJ_N, 2055 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2056 MatILUFactorSymbolic_SeqBAIJ, 2057 MatICCFactorSymbolic_SeqBAIJ, 2058 MatGetArray_SeqBAIJ, 2059 MatRestoreArray_SeqBAIJ, 2060 /*35*/ MatDuplicate_SeqBAIJ, 2061 0, 2062 0, 2063 MatILUFactor_SeqBAIJ, 2064 0, 2065 /*40*/ MatAXPY_SeqBAIJ, 2066 MatGetSubMatrices_SeqBAIJ, 2067 MatIncreaseOverlap_SeqBAIJ, 2068 MatGetValues_SeqBAIJ, 2069 MatCopy_SeqBAIJ, 2070 /*45*/ 0, 2071 MatScale_SeqBAIJ, 2072 0, 2073 0, 2074 0, 2075 /*50*/ 0, 2076 MatGetRowIJ_SeqBAIJ, 2077 MatRestoreRowIJ_SeqBAIJ, 2078 0, 2079 0, 2080 /*55*/ 0, 2081 0, 2082 0, 2083 0, 2084 MatSetValuesBlocked_SeqBAIJ, 2085 /*60*/ MatGetSubMatrix_SeqBAIJ, 2086 MatDestroy_SeqBAIJ, 2087 MatView_SeqBAIJ, 2088 0, 2089 0, 2090 /*65*/ 0, 2091 0, 2092 0, 2093 0, 2094 0, 2095 /*70*/ MatGetRowMax_SeqBAIJ, 2096 MatConvert_Basic, 2097 0, 2098 0, 2099 0, 2100 /*75*/ 0, 2101 0, 2102 0, 2103 0, 2104 0, 2105 /*80*/ 0, 2106 0, 2107 0, 2108 0, 2109 MatLoad_SeqBAIJ, 2110 /*85*/ 0, 2111 0, 2112 0, 2113 0, 2114 0, 2115 /*90*/ 0, 2116 0, 2117 0, 2118 0, 2119 0, 2120 /*95*/ 0, 2121 0, 2122 0, 2123 0, 2124 0, 2125 /*100*/0, 2126 0, 2127 0, 2128 0, 2129 0, 2130 /*105*/0, 2131 MatRealPart_SeqBAIJ, 2132 MatImaginaryPart_SeqBAIJ 2133 }; 2134 2135 EXTERN_C_BEGIN 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2138 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2139 { 2140 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2141 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 if (aij->nonew != 1) { 2146 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2147 } 2148 2149 /* allocate space for values if not already there */ 2150 if (!aij->saved_values) { 2151 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2152 } 2153 2154 /* copy values over */ 2155 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 EXTERN_C_END 2159 2160 EXTERN_C_BEGIN 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2163 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2164 { 2165 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2166 PetscErrorCode ierr; 2167 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2168 2169 PetscFunctionBegin; 2170 if (aij->nonew != 1) { 2171 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2172 } 2173 if (!aij->saved_values) { 2174 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2175 } 2176 2177 /* copy values over */ 2178 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 EXTERN_C_END 2182 2183 EXTERN_C_BEGIN 2184 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2185 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2186 EXTERN_C_END 2187 2188 EXTERN_C_BEGIN 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2191 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2192 { 2193 Mat_SeqBAIJ *b; 2194 PetscErrorCode ierr; 2195 PetscInt i,mbs,nbs,bs2,newbs = bs; 2196 PetscTruth flg,skipallocation = PETSC_FALSE; 2197 2198 PetscFunctionBegin; 2199 2200 if (nz == MAT_SKIP_ALLOCATION) { 2201 skipallocation = PETSC_TRUE; 2202 nz = 0; 2203 } 2204 2205 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2206 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2207 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2208 2209 if (nnz && newbs != bs) { 2210 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2211 } 2212 bs = newbs; 2213 2214 B->rmap.bs = B->cmap.bs = bs; 2215 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2216 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2217 2218 B->preallocated = PETSC_TRUE; 2219 2220 mbs = B->rmap.n/bs; 2221 nbs = B->cmap.n/bs; 2222 bs2 = bs*bs; 2223 2224 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2225 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2226 } 2227 2228 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2229 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2230 if (nnz) { 2231 for (i=0; i<mbs; i++) { 2232 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2233 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2234 } 2235 } 2236 2237 b = (Mat_SeqBAIJ*)B->data; 2238 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2239 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2240 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2241 2242 B->ops->solve = MatSolve_SeqBAIJ_Update; 2243 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2244 if (!flg) { 2245 switch (bs) { 2246 case 1: 2247 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2248 B->ops->mult = MatMult_SeqBAIJ_1; 2249 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2250 break; 2251 case 2: 2252 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2253 B->ops->mult = MatMult_SeqBAIJ_2; 2254 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2255 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2256 break; 2257 case 3: 2258 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2259 B->ops->mult = MatMult_SeqBAIJ_3; 2260 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2261 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2262 break; 2263 case 4: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2265 B->ops->mult = MatMult_SeqBAIJ_4; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2267 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2268 break; 2269 case 5: 2270 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2271 B->ops->mult = MatMult_SeqBAIJ_5; 2272 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2273 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2274 break; 2275 case 6: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2277 B->ops->mult = MatMult_SeqBAIJ_6; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2279 break; 2280 case 7: 2281 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2282 B->ops->mult = MatMult_SeqBAIJ_7; 2283 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2284 break; 2285 default: 2286 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2287 B->ops->mult = MatMult_SeqBAIJ_N; 2288 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2289 break; 2290 } 2291 } 2292 B->rmap.bs = bs; 2293 b->mbs = mbs; 2294 b->nbs = nbs; 2295 if (!skipallocation) { 2296 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2297 /* b->ilen will count nonzeros in each block row so far. */ 2298 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2299 if (!nnz) { 2300 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2301 else if (nz <= 0) nz = 1; 2302 for (i=0; i<mbs; i++) b->imax[i] = nz; 2303 nz = nz*mbs; 2304 } else { 2305 nz = 0; 2306 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2307 } 2308 2309 /* allocate the matrix space */ 2310 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2311 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2312 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2313 b->singlemalloc = PETSC_TRUE; 2314 2315 b->i[0] = 0; 2316 for (i=1; i<mbs+1; i++) { 2317 b->i[i] = b->i[i-1] + b->imax[i-1]; 2318 } 2319 b->free_a = PETSC_TRUE; 2320 b->free_ij = PETSC_TRUE; 2321 } else { 2322 b->free_a = PETSC_FALSE; 2323 b->free_ij = PETSC_FALSE; 2324 } 2325 2326 B->rmap.bs = bs; 2327 b->bs2 = bs2; 2328 b->mbs = mbs; 2329 b->nz = 0; 2330 b->maxnz = nz*bs2; 2331 B->info.nz_unneeded = (PetscReal)b->maxnz; 2332 PetscFunctionReturn(0); 2333 } 2334 EXTERN_C_END 2335 2336 /*MC 2337 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2338 block sparse compressed row format. 2339 2340 Options Database Keys: 2341 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2342 2343 Level: beginner 2344 2345 .seealso: MatCreateSeqBAIJ() 2346 M*/ 2347 2348 EXTERN_C_BEGIN 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatCreate_SeqBAIJ" 2351 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2352 { 2353 PetscErrorCode ierr; 2354 PetscMPIInt size; 2355 Mat_SeqBAIJ *b; 2356 2357 PetscFunctionBegin; 2358 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2359 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2360 2361 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2362 B->data = (void*)b; 2363 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2364 B->factor = 0; 2365 B->mapping = 0; 2366 b->row = 0; 2367 b->col = 0; 2368 b->icol = 0; 2369 b->reallocs = 0; 2370 b->saved_values = 0; 2371 #if defined(PETSC_USE_MAT_SINGLE) 2372 b->setvalueslen = 0; 2373 b->setvaluescopy = PETSC_NULL; 2374 #endif 2375 2376 b->sorted = PETSC_FALSE; 2377 b->roworiented = PETSC_TRUE; 2378 b->nonew = 0; 2379 b->diag = 0; 2380 b->solve_work = 0; 2381 b->mult_work = 0; 2382 B->spptr = 0; 2383 B->info.nz_unneeded = (PetscReal)b->maxnz; 2384 b->keepzeroedrows = PETSC_FALSE; 2385 b->xtoy = 0; 2386 b->XtoY = 0; 2387 b->compressedrow.use = PETSC_FALSE; 2388 b->compressedrow.nrows = 0; 2389 b->compressedrow.i = PETSC_NULL; 2390 b->compressedrow.rindex = PETSC_NULL; 2391 b->compressedrow.checked = PETSC_FALSE; 2392 B->same_nonzero = PETSC_FALSE; 2393 2394 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2395 "MatInvertBlockDiagonal_SeqBAIJ", 2396 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2397 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2398 "MatStoreValues_SeqBAIJ", 2399 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2400 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2401 "MatRetrieveValues_SeqBAIJ", 2402 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2403 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2404 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2405 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2406 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2407 "MatConvert_SeqBAIJ_SeqAIJ", 2408 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2409 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2410 "MatConvert_SeqBAIJ_SeqSBAIJ", 2411 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2413 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2414 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2415 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 EXTERN_C_END 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2422 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2423 { 2424 Mat C; 2425 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2426 PetscErrorCode ierr; 2427 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2428 2429 PetscFunctionBegin; 2430 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2431 2432 *B = 0; 2433 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2434 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2435 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2436 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2437 c = (Mat_SeqBAIJ*)C->data; 2438 2439 C->rmap.N = A->rmap.N; 2440 C->cmap.N = A->cmap.N; 2441 C->rmap.bs = A->rmap.bs; 2442 c->bs2 = a->bs2; 2443 c->mbs = a->mbs; 2444 c->nbs = a->nbs; 2445 2446 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2447 for (i=0; i<mbs; i++) { 2448 c->imax[i] = a->imax[i]; 2449 c->ilen[i] = a->ilen[i]; 2450 } 2451 2452 /* allocate the matrix space */ 2453 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2454 c->singlemalloc = PETSC_TRUE; 2455 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2456 if (mbs > 0) { 2457 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2458 if (cpvalues == MAT_COPY_VALUES) { 2459 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2460 } else { 2461 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2462 } 2463 } 2464 c->sorted = a->sorted; 2465 c->roworiented = a->roworiented; 2466 c->nonew = a->nonew; 2467 2468 if (a->diag) { 2469 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2470 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2471 for (i=0; i<mbs; i++) { 2472 c->diag[i] = a->diag[i]; 2473 } 2474 } else c->diag = 0; 2475 c->nz = a->nz; 2476 c->maxnz = a->maxnz; 2477 c->solve_work = 0; 2478 c->mult_work = 0; 2479 c->free_a = PETSC_TRUE; 2480 c->free_ij = PETSC_TRUE; 2481 C->preallocated = PETSC_TRUE; 2482 C->assembled = PETSC_TRUE; 2483 2484 c->compressedrow.use = a->compressedrow.use; 2485 c->compressedrow.nrows = a->compressedrow.nrows; 2486 c->compressedrow.checked = a->compressedrow.checked; 2487 if ( a->compressedrow.checked && a->compressedrow.use){ 2488 i = a->compressedrow.nrows; 2489 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2490 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2491 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2492 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2493 } else { 2494 c->compressedrow.use = PETSC_FALSE; 2495 c->compressedrow.i = PETSC_NULL; 2496 c->compressedrow.rindex = PETSC_NULL; 2497 } 2498 C->same_nonzero = A->same_nonzero; 2499 *B = C; 2500 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatLoad_SeqBAIJ" 2506 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2507 { 2508 Mat_SeqBAIJ *a; 2509 Mat B; 2510 PetscErrorCode ierr; 2511 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2512 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2513 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2514 PetscInt *masked,nmask,tmp,bs2,ishift; 2515 PetscMPIInt size; 2516 int fd; 2517 PetscScalar *aa; 2518 MPI_Comm comm = ((PetscObject)viewer)->comm; 2519 2520 PetscFunctionBegin; 2521 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2522 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2523 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2524 bs2 = bs*bs; 2525 2526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2527 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2528 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2529 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2530 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2531 M = header[1]; N = header[2]; nz = header[3]; 2532 2533 if (header[3] < 0) { 2534 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2535 } 2536 2537 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2538 2539 /* 2540 This code adds extra rows to make sure the number of rows is 2541 divisible by the blocksize 2542 */ 2543 mbs = M/bs; 2544 extra_rows = bs - M + bs*(mbs); 2545 if (extra_rows == bs) extra_rows = 0; 2546 else mbs++; 2547 if (extra_rows) { 2548 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2549 } 2550 2551 /* read in row lengths */ 2552 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2553 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2554 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2555 2556 /* read in column indices */ 2557 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2558 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2559 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2560 2561 /* loop over row lengths determining block row lengths */ 2562 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2563 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2564 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2565 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2566 masked = mask + mbs; 2567 rowcount = 0; nzcount = 0; 2568 for (i=0; i<mbs; i++) { 2569 nmask = 0; 2570 for (j=0; j<bs; j++) { 2571 kmax = rowlengths[rowcount]; 2572 for (k=0; k<kmax; k++) { 2573 tmp = jj[nzcount++]/bs; 2574 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2575 } 2576 rowcount++; 2577 } 2578 browlengths[i] += nmask; 2579 /* zero out the mask elements we set */ 2580 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2581 } 2582 2583 /* create our matrix */ 2584 ierr = MatCreate(comm,&B); 2585 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2586 ierr = MatSetType(B,type);CHKERRQ(ierr); 2587 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2588 a = (Mat_SeqBAIJ*)B->data; 2589 2590 /* set matrix "i" values */ 2591 a->i[0] = 0; 2592 for (i=1; i<= mbs; i++) { 2593 a->i[i] = a->i[i-1] + browlengths[i-1]; 2594 a->ilen[i-1] = browlengths[i-1]; 2595 } 2596 a->nz = 0; 2597 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2598 2599 /* read in nonzero values */ 2600 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2601 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2602 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2603 2604 /* set "a" and "j" values into matrix */ 2605 nzcount = 0; jcount = 0; 2606 for (i=0; i<mbs; i++) { 2607 nzcountb = nzcount; 2608 nmask = 0; 2609 for (j=0; j<bs; j++) { 2610 kmax = rowlengths[i*bs+j]; 2611 for (k=0; k<kmax; k++) { 2612 tmp = jj[nzcount++]/bs; 2613 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2614 } 2615 } 2616 /* sort the masked values */ 2617 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2618 2619 /* set "j" values into matrix */ 2620 maskcount = 1; 2621 for (j=0; j<nmask; j++) { 2622 a->j[jcount++] = masked[j]; 2623 mask[masked[j]] = maskcount++; 2624 } 2625 /* set "a" values into matrix */ 2626 ishift = bs2*a->i[i]; 2627 for (j=0; j<bs; j++) { 2628 kmax = rowlengths[i*bs+j]; 2629 for (k=0; k<kmax; k++) { 2630 tmp = jj[nzcountb]/bs ; 2631 block = mask[tmp] - 1; 2632 point = jj[nzcountb] - bs*tmp; 2633 idx = ishift + bs2*block + j + bs*point; 2634 a->a[idx] = (MatScalar)aa[nzcountb++]; 2635 } 2636 } 2637 /* zero out the mask elements we set */ 2638 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2639 } 2640 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2641 2642 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2643 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2644 ierr = PetscFree(aa);CHKERRQ(ierr); 2645 ierr = PetscFree(jj);CHKERRQ(ierr); 2646 ierr = PetscFree(mask);CHKERRQ(ierr); 2647 2648 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatView_Private(B);CHKERRQ(ierr); 2651 2652 *A = B; 2653 PetscFunctionReturn(0); 2654 } 2655 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "MatCreateSeqBAIJ" 2658 /*@C 2659 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2660 compressed row) format. For good matrix assembly performance the 2661 user should preallocate the matrix storage by setting the parameter nz 2662 (or the array nnz). By setting these parameters accurately, performance 2663 during matrix assembly can be increased by more than a factor of 50. 2664 2665 Collective on MPI_Comm 2666 2667 Input Parameters: 2668 + comm - MPI communicator, set to PETSC_COMM_SELF 2669 . bs - size of block 2670 . m - number of rows 2671 . n - number of columns 2672 . nz - number of nonzero blocks per block row (same for all rows) 2673 - nnz - array containing the number of nonzero blocks in the various block rows 2674 (possibly different for each block row) or PETSC_NULL 2675 2676 Output Parameter: 2677 . A - the matrix 2678 2679 Options Database Keys: 2680 . -mat_no_unroll - uses code that does not unroll the loops in the 2681 block calculations (much slower) 2682 . -mat_block_size - size of the blocks to use 2683 2684 Level: intermediate 2685 2686 Notes: 2687 The number of rows and columns must be divisible by blocksize. 2688 2689 If the nnz parameter is given then the nz parameter is ignored 2690 2691 A nonzero block is any block that as 1 or more nonzeros in it 2692 2693 The block AIJ format is fully compatible with standard Fortran 77 2694 storage. That is, the stored row and column indices can begin at 2695 either one (as in Fortran) or zero. See the users' manual for details. 2696 2697 Specify the preallocated storage with either nz or nnz (not both). 2698 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2699 allocation. For additional details, see the users manual chapter on 2700 matrices. 2701 2702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2703 @*/ 2704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2705 { 2706 PetscErrorCode ierr; 2707 2708 PetscFunctionBegin; 2709 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2710 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2711 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2712 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2718 /*@C 2719 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2720 per row in the matrix. For good matrix assembly performance the 2721 user should preallocate the matrix storage by setting the parameter nz 2722 (or the array nnz). By setting these parameters accurately, performance 2723 during matrix assembly can be increased by more than a factor of 50. 2724 2725 Collective on MPI_Comm 2726 2727 Input Parameters: 2728 + A - the matrix 2729 . bs - size of block 2730 . nz - number of block nonzeros per block row (same for all rows) 2731 - nnz - array containing the number of block nonzeros in the various block rows 2732 (possibly different for each block row) or PETSC_NULL 2733 2734 Options Database Keys: 2735 . -mat_no_unroll - uses code that does not unroll the loops in the 2736 block calculations (much slower) 2737 . -mat_block_size - size of the blocks to use 2738 2739 Level: intermediate 2740 2741 Notes: 2742 If the nnz parameter is given then the nz parameter is ignored 2743 2744 The block AIJ format is fully compatible with standard Fortran 77 2745 storage. That is, the stored row and column indices can begin at 2746 either one (as in Fortran) or zero. See the users' manual for details. 2747 2748 Specify the preallocated storage with either nz or nnz (not both). 2749 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2750 allocation. For additional details, see the users manual chapter on 2751 matrices. 2752 2753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2754 @*/ 2755 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2756 { 2757 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2758 2759 PetscFunctionBegin; 2760 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2761 if (f) { 2762 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2763 } 2764 PetscFunctionReturn(0); 2765 } 2766 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2769 /*@ 2770 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2771 (upper triangular entries in CSR format) provided by the user. 2772 2773 Collective on MPI_Comm 2774 2775 Input Parameters: 2776 + comm - must be an MPI communicator of size 1 2777 . bs - size of block 2778 . m - number of rows 2779 . n - number of columns 2780 . i - row indices 2781 . j - column indices 2782 - a - matrix values 2783 2784 Output Parameter: 2785 . mat - the matrix 2786 2787 Level: intermediate 2788 2789 Notes: 2790 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2791 once the matrix is destroyed 2792 2793 You cannot set new nonzero locations into this matrix, that will generate an error. 2794 2795 The i and j indices are 0 based 2796 2797 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2798 2799 @*/ 2800 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2801 { 2802 PetscErrorCode ierr; 2803 PetscInt ii; 2804 Mat_SeqBAIJ *baij; 2805 2806 PetscFunctionBegin; 2807 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2808 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2809 2810 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2811 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2812 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2813 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2814 baij = (Mat_SeqBAIJ*)(*mat)->data; 2815 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2816 2817 baij->i = i; 2818 baij->j = j; 2819 baij->a = a; 2820 baij->singlemalloc = PETSC_FALSE; 2821 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2822 baij->free_a = PETSC_FALSE; 2823 baij->free_ij = PETSC_FALSE; 2824 2825 for (ii=0; ii<m; ii++) { 2826 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2827 #if defined(PETSC_USE_DEBUG) 2828 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2829 #endif 2830 } 2831 #if defined(PETSC_USE_DEBUG) 2832 for (ii=0; ii<baij->i[m]; ii++) { 2833 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2834 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2835 } 2836 #endif 2837 2838 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2839 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 PetscFunctionReturn(0); 2841 } 2842