1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for direct calls from Fortran (Used in PETSc-fun3d) 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval,lastcol = -1; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 590 stepval = (n-1)*4; 591 for (k=0; k<m; k++) { /* loop over added rows */ 592 row = im[k]; 593 rp = aj + ai[row]; 594 ap = aa + 16*ai[row]; 595 nrow = ailen[row]; 596 low = 0; 597 high = nrow; 598 for (l=0; l<n; l++) { /* loop over added columns */ 599 col = in[l]; 600 if (col <= lastcol) low = 0; else high = nrow; 601 lastcol = col; 602 value = v + k*(stepval+4 + l)*4; 603 while (high-low > 7) { 604 t = (low+high)/2; 605 if (rp[t] > col) high = t; 606 else low = t; 607 } 608 for (i=low; i<high; i++) { 609 if (rp[i] > col) break; 610 if (rp[i] == col) { 611 bap = ap + 16*i; 612 for (ii=0; ii<4; ii++,value+=stepval) { 613 for (jj=ii; jj<16; jj+=4) { 614 bap[jj] += *value++; 615 } 616 } 617 goto noinsert2; 618 } 619 } 620 N = nrow++ - 1; 621 high++; /* added new column index thus must search to one higher than before */ 622 /* shift up all the later entries in this row */ 623 for (ii=N; ii>=i; ii--) { 624 rp[ii+1] = rp[ii]; 625 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 626 } 627 if (N >= i) { 628 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 629 } 630 rp[i] = col; 631 bap = ap + 16*i; 632 for (ii=0; ii<4; ii++,value+=stepval) { 633 for (jj=ii; jj<16; jj+=4) { 634 bap[jj] = *value++; 635 } 636 } 637 noinsert2:; 638 low = i; 639 } 640 ailen[row] = nrow; 641 } 642 PetscFunctionReturnVoid(); 643 } 644 EXTERN_C_END 645 646 #if defined(PETSC_HAVE_FORTRAN_CAPS) 647 #define matsetvalues4_ MATSETVALUES4 648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 649 #define matsetvalues4_ matsetvalues4 650 #endif 651 652 EXTERN_C_BEGIN 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatSetValues4_" 655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 656 { 657 Mat A = *AA; 658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 659 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 660 PetscInt *ai=a->i,*ailen=a->ilen; 661 PetscInt *aj=a->j,brow,bcol; 662 PetscInt ridx,cidx,lastcol = -1; 663 MatScalar *ap,value,*aa=a->a,*bap; 664 665 PetscFunctionBegin; 666 for (k=0; k<m; k++) { /* loop over added rows */ 667 row = im[k]; brow = row/4; 668 rp = aj + ai[brow]; 669 ap = aa + 16*ai[brow]; 670 nrow = ailen[brow]; 671 low = 0; 672 high = nrow; 673 for (l=0; l<n; l++) { /* loop over added columns */ 674 col = in[l]; bcol = col/4; 675 ridx = row % 4; cidx = col % 4; 676 value = v[l + k*n]; 677 if (col <= lastcol) low = 0; else high = nrow; 678 lastcol = col; 679 while (high-low > 7) { 680 t = (low+high)/2; 681 if (rp[t] > bcol) high = t; 682 else low = t; 683 } 684 for (i=low; i<high; i++) { 685 if (rp[i] > bcol) break; 686 if (rp[i] == bcol) { 687 bap = ap + 16*i + 4*cidx + ridx; 688 *bap += value; 689 goto noinsert1; 690 } 691 } 692 N = nrow++ - 1; 693 high++; /* added new column thus must search to one higher than before */ 694 /* shift up all the later entries in this row */ 695 for (ii=N; ii>=i; ii--) { 696 rp[ii+1] = rp[ii]; 697 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 698 } 699 if (N>=i) { 700 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 701 } 702 rp[i] = bcol; 703 ap[16*i + 4*cidx + ridx] = value; 704 noinsert1:; 705 low = i; 706 } 707 ailen[brow] = nrow; 708 } 709 PetscFunctionReturnVoid(); 710 } 711 EXTERN_C_END 712 713 /* UGLY, ugly, ugly 714 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 715 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 716 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 717 converts the entries into single precision and then calls ..._MatScalar() to put them 718 into the single precision data structures. 719 */ 720 #if defined(PETSC_USE_MAT_SINGLE) 721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 722 #else 723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 724 #endif 725 726 #define CHUNKSIZE 10 727 728 /* 729 Checks for missing diagonals 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 PetscErrorCode ierr; 737 PetscInt *diag,*jj = a->j,i; 738 739 PetscFunctionBegin; 740 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 741 diag = a->diag; 742 for (i=0; i<a->mbs; i++) { 743 if (jj[diag[i]] != i) { 744 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 745 } 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 753 { 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 755 PetscErrorCode ierr; 756 PetscInt i,j,*diag,m = a->mbs; 757 758 PetscFunctionBegin; 759 if (a->diag) PetscFunctionReturn(0); 760 761 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 762 ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 763 for (i=0; i<m; i++) { 764 diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 diag[i] = j; 768 break; 769 } 770 } 771 } 772 a->diag = diag; 773 PetscFunctionReturn(0); 774 } 775 776 777 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 781 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 782 { 783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 784 PetscErrorCode ierr; 785 PetscInt n = a->mbs,i; 786 787 PetscFunctionBegin; 788 *nn = n; 789 if (!ia) PetscFunctionReturn(0); 790 if (symmetric) { 791 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 792 } else if (oshift == 1) { 793 /* temporarily add 1 to i and j indices */ 794 PetscInt nz = a->i[n]; 795 for (i=0; i<nz; i++) a->j[i]++; 796 for (i=0; i<n+1; i++) a->i[i]++; 797 *ia = a->i; *ja = a->j; 798 } else { 799 *ia = a->i; *ja = a->j; 800 } 801 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 807 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 810 PetscErrorCode ierr; 811 PetscInt i,n = a->mbs; 812 813 PetscFunctionBegin; 814 if (!ia) PetscFunctionReturn(0); 815 if (symmetric) { 816 ierr = PetscFree(*ia);CHKERRQ(ierr); 817 ierr = PetscFree(*ja);CHKERRQ(ierr); 818 } else if (oshift == 1) { 819 PetscInt nz = a->i[n]-1; 820 for (i=0; i<nz; i++) a->j[i]--; 821 for (i=0; i<n+1; i++) a->i[i]--; 822 } 823 PetscFunctionReturn(0); 824 } 825 826 #undef __FUNCT__ 827 #define __FUNCT__ "MatDestroy_SeqBAIJ" 828 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 829 { 830 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 831 PetscErrorCode ierr; 832 833 PetscFunctionBegin; 834 #if defined(PETSC_USE_LOG) 835 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 836 #endif 837 if (a->freedata){ 838 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 839 } 840 if (a->row) { 841 ierr = ISDestroy(a->row);CHKERRQ(ierr); 842 } 843 if (a->col) { 844 ierr = ISDestroy(a->col);CHKERRQ(ierr); 845 } 846 ierr = PetscFree(a->diag);CHKERRQ(ierr); 847 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 848 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 849 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 850 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 851 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 852 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 853 #if defined(PETSC_USE_MAT_SINGLE) 854 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 855 #endif 856 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 857 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 858 859 ierr = PetscFree(a);CHKERRQ(ierr); 860 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 866 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 867 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "MatSetOption_SeqBAIJ" 873 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 874 { 875 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 876 PetscErrorCode ierr; 877 878 PetscFunctionBegin; 879 switch (op) { 880 case MAT_ROW_ORIENTED: 881 a->roworiented = PETSC_TRUE; 882 break; 883 case MAT_COLUMN_ORIENTED: 884 a->roworiented = PETSC_FALSE; 885 break; 886 case MAT_COLUMNS_SORTED: 887 a->sorted = PETSC_TRUE; 888 break; 889 case MAT_COLUMNS_UNSORTED: 890 a->sorted = PETSC_FALSE; 891 break; 892 case MAT_KEEP_ZEROED_ROWS: 893 a->keepzeroedrows = PETSC_TRUE; 894 break; 895 case MAT_NO_NEW_NONZERO_LOCATIONS: 896 a->nonew = 1; 897 break; 898 case MAT_NEW_NONZERO_LOCATION_ERR: 899 a->nonew = -1; 900 break; 901 case MAT_NEW_NONZERO_ALLOCATION_ERR: 902 a->nonew = -2; 903 break; 904 case MAT_YES_NEW_NONZERO_LOCATIONS: 905 a->nonew = 0; 906 break; 907 case MAT_ROWS_SORTED: 908 case MAT_ROWS_UNSORTED: 909 case MAT_YES_NEW_DIAGONALS: 910 case MAT_IGNORE_OFF_PROC_ENTRIES: 911 case MAT_USE_HASH_TABLE: 912 ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr); 913 break; 914 case MAT_NO_NEW_DIAGONALS: 915 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 916 case MAT_SYMMETRIC: 917 case MAT_STRUCTURALLY_SYMMETRIC: 918 case MAT_NOT_SYMMETRIC: 919 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 920 case MAT_HERMITIAN: 921 case MAT_NOT_HERMITIAN: 922 case MAT_SYMMETRY_ETERNAL: 923 case MAT_NOT_SYMMETRY_ETERNAL: 924 break; 925 default: 926 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 927 } 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatGetRow_SeqBAIJ" 933 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 934 { 935 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 938 MatScalar *aa,*aa_i; 939 PetscScalar *v_i; 940 941 PetscFunctionBegin; 942 bs = A->rmap.bs; 943 ai = a->i; 944 aj = a->j; 945 aa = a->a; 946 bs2 = a->bs2; 947 948 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 949 950 bn = row/bs; /* Block number */ 951 bp = row % bs; /* Block Position */ 952 M = ai[bn+1] - ai[bn]; 953 *nz = bs*M; 954 955 if (v) { 956 *v = 0; 957 if (*nz) { 958 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 959 for (i=0; i<M; i++) { /* for each block in the block row */ 960 v_i = *v + i*bs; 961 aa_i = aa + bs2*(ai[bn] + i); 962 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 963 } 964 } 965 } 966 967 if (idx) { 968 *idx = 0; 969 if (*nz) { 970 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 971 for (i=0; i<M; i++) { /* for each block in the block row */ 972 idx_i = *idx + i*bs; 973 itmp = bs*aj[ai[bn] + i]; 974 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 975 } 976 } 977 } 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 983 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 984 { 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 989 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatTranspose_SeqBAIJ" 995 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 996 { 997 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 998 Mat C; 999 PetscErrorCode ierr; 1000 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1001 PetscInt *rows,*cols,bs2=a->bs2; 1002 PetscScalar *array; 1003 1004 PetscFunctionBegin; 1005 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1006 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1007 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1008 1009 #if defined(PETSC_USE_MAT_SINGLE) 1010 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1011 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1012 #else 1013 array = a->a; 1014 #endif 1015 1016 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1017 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1018 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1019 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1020 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1021 ierr = PetscFree(col);CHKERRQ(ierr); 1022 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1023 cols = rows + bs; 1024 for (i=0; i<mbs; i++) { 1025 cols[0] = i*bs; 1026 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1027 len = ai[i+1] - ai[i]; 1028 for (j=0; j<len; j++) { 1029 rows[0] = (*aj++)*bs; 1030 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1031 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1032 array += bs2; 1033 } 1034 } 1035 ierr = PetscFree(rows);CHKERRQ(ierr); 1036 #if defined(PETSC_USE_MAT_SINGLE) 1037 ierr = PetscFree(array); 1038 #endif 1039 1040 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 1043 if (B) { 1044 *B = C; 1045 } else { 1046 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1053 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1054 { 1055 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1058 int fd; 1059 PetscScalar *aa; 1060 FILE *file; 1061 1062 PetscFunctionBegin; 1063 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1064 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1065 col_lens[0] = MAT_FILE_COOKIE; 1066 1067 col_lens[1] = A->rmap.N; 1068 col_lens[2] = A->cmap.n; 1069 col_lens[3] = a->nz*bs2; 1070 1071 /* store lengths of each row and write (including header) to file */ 1072 count = 0; 1073 for (i=0; i<a->mbs; i++) { 1074 for (j=0; j<bs; j++) { 1075 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1076 } 1077 } 1078 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1079 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1080 1081 /* store column indices (zero start index) */ 1082 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1083 count = 0; 1084 for (i=0; i<a->mbs; i++) { 1085 for (j=0; j<bs; j++) { 1086 for (k=a->i[i]; k<a->i[i+1]; k++) { 1087 for (l=0; l<bs; l++) { 1088 jj[count++] = bs*a->j[k] + l; 1089 } 1090 } 1091 } 1092 } 1093 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1094 ierr = PetscFree(jj);CHKERRQ(ierr); 1095 1096 /* store nonzero values */ 1097 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1098 count = 0; 1099 for (i=0; i<a->mbs; i++) { 1100 for (j=0; j<bs; j++) { 1101 for (k=a->i[i]; k<a->i[i+1]; k++) { 1102 for (l=0; l<bs; l++) { 1103 aa[count++] = a->a[bs2*k + l*bs + j]; 1104 } 1105 } 1106 } 1107 } 1108 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1109 ierr = PetscFree(aa);CHKERRQ(ierr); 1110 1111 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1112 if (file) { 1113 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1120 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1121 { 1122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1125 PetscViewerFormat format; 1126 1127 PetscFunctionBegin; 1128 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1129 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1130 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1131 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1132 Mat aij; 1133 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1134 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1135 ierr = MatDestroy(aij);CHKERRQ(ierr); 1136 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1137 PetscFunctionReturn(0); 1138 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1139 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1140 for (i=0; i<a->mbs; i++) { 1141 for (j=0; j<bs; j++) { 1142 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1143 for (k=a->i[i]; k<a->i[i+1]; k++) { 1144 for (l=0; l<bs; l++) { 1145 #if defined(PETSC_USE_COMPLEX) 1146 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1147 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1148 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1149 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1150 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1151 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1152 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1153 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1154 } 1155 #else 1156 if (a->a[bs2*k + l*bs + j] != 0.0) { 1157 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1158 } 1159 #endif 1160 } 1161 } 1162 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1163 } 1164 } 1165 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1166 } else { 1167 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1168 for (i=0; i<a->mbs; i++) { 1169 for (j=0; j<bs; j++) { 1170 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1171 for (k=a->i[i]; k<a->i[i+1]; k++) { 1172 for (l=0; l<bs; l++) { 1173 #if defined(PETSC_USE_COMPLEX) 1174 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1175 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1176 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1177 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1178 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1179 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1180 } else { 1181 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1182 } 1183 #else 1184 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1185 #endif 1186 } 1187 } 1188 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1189 } 1190 } 1191 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1192 } 1193 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1199 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1200 { 1201 Mat A = (Mat) Aa; 1202 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1205 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1206 MatScalar *aa; 1207 PetscViewer viewer; 1208 1209 PetscFunctionBegin; 1210 1211 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1212 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1213 1214 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1215 1216 /* loop over matrix elements drawing boxes */ 1217 color = PETSC_DRAW_BLUE; 1218 for (i=0,row=0; i<mbs; i++,row+=bs) { 1219 for (j=a->i[i]; j<a->i[i+1]; j++) { 1220 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1221 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1222 aa = a->a + j*bs2; 1223 for (k=0; k<bs; k++) { 1224 for (l=0; l<bs; l++) { 1225 if (PetscRealPart(*aa++) >= 0.) continue; 1226 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1227 } 1228 } 1229 } 1230 } 1231 color = PETSC_DRAW_CYAN; 1232 for (i=0,row=0; i<mbs; i++,row+=bs) { 1233 for (j=a->i[i]; j<a->i[i+1]; j++) { 1234 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1235 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1236 aa = a->a + j*bs2; 1237 for (k=0; k<bs; k++) { 1238 for (l=0; l<bs; l++) { 1239 if (PetscRealPart(*aa++) != 0.) continue; 1240 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1241 } 1242 } 1243 } 1244 } 1245 1246 color = PETSC_DRAW_RED; 1247 for (i=0,row=0; i<mbs; i++,row+=bs) { 1248 for (j=a->i[i]; j<a->i[i+1]; j++) { 1249 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1250 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1251 aa = a->a + j*bs2; 1252 for (k=0; k<bs; k++) { 1253 for (l=0; l<bs; l++) { 1254 if (PetscRealPart(*aa++) <= 0.) continue; 1255 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1256 } 1257 } 1258 } 1259 } 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1265 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1266 { 1267 PetscErrorCode ierr; 1268 PetscReal xl,yl,xr,yr,w,h; 1269 PetscDraw draw; 1270 PetscTruth isnull; 1271 1272 PetscFunctionBegin; 1273 1274 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1275 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1276 1277 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1278 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1279 xr += w; yr += h; xl = -w; yl = -h; 1280 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1281 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1282 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatView_SeqBAIJ" 1288 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1289 { 1290 PetscErrorCode ierr; 1291 PetscTruth iascii,isbinary,isdraw; 1292 1293 PetscFunctionBegin; 1294 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1296 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1297 if (iascii){ 1298 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1299 } else if (isbinary) { 1300 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1301 } else if (isdraw) { 1302 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1303 } else { 1304 Mat B; 1305 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1306 ierr = MatView(B,viewer);CHKERRQ(ierr); 1307 ierr = MatDestroy(B);CHKERRQ(ierr); 1308 } 1309 PetscFunctionReturn(0); 1310 } 1311 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1315 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1316 { 1317 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1318 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1319 PetscInt *ai = a->i,*ailen = a->ilen; 1320 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1321 MatScalar *ap,*aa = a->a,zero = 0.0; 1322 1323 PetscFunctionBegin; 1324 for (k=0; k<m; k++) { /* loop over rows */ 1325 row = im[k]; brow = row/bs; 1326 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1327 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1328 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1329 nrow = ailen[brow]; 1330 for (l=0; l<n; l++) { /* loop over columns */ 1331 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1332 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1333 col = in[l] ; 1334 bcol = col/bs; 1335 cidx = col%bs; 1336 ridx = row%bs; 1337 high = nrow; 1338 low = 0; /* assume unsorted */ 1339 while (high-low > 5) { 1340 t = (low+high)/2; 1341 if (rp[t] > bcol) high = t; 1342 else low = t; 1343 } 1344 for (i=low; i<high; i++) { 1345 if (rp[i] > bcol) break; 1346 if (rp[i] == bcol) { 1347 *v++ = ap[bs2*i+bs*cidx+ridx]; 1348 goto finished; 1349 } 1350 } 1351 *v++ = zero; 1352 finished:; 1353 } 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #if defined(PETSC_USE_MAT_SINGLE) 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1361 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1362 { 1363 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1364 PetscErrorCode ierr; 1365 PetscInt i,N = m*n*b->bs2; 1366 MatScalar *vsingle; 1367 1368 PetscFunctionBegin; 1369 if (N > b->setvalueslen) { 1370 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1371 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1372 b->setvalueslen = N; 1373 } 1374 vsingle = b->setvaluescopy; 1375 for (i=0; i<N; i++) { 1376 vsingle[i] = v[i]; 1377 } 1378 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 #endif 1382 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1386 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1387 { 1388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1389 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1390 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1391 PetscErrorCode ierr; 1392 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1393 PetscTruth roworiented=a->roworiented; 1394 const MatScalar *value = v; 1395 MatScalar *ap,*aa = a->a,*bap; 1396 1397 PetscFunctionBegin; 1398 if (roworiented) { 1399 stepval = (n-1)*bs; 1400 } else { 1401 stepval = (m-1)*bs; 1402 } 1403 for (k=0; k<m; k++) { /* loop over added rows */ 1404 row = im[k]; 1405 if (row < 0) continue; 1406 #if defined(PETSC_USE_DEBUG) 1407 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1408 #endif 1409 rp = aj + ai[row]; 1410 ap = aa + bs2*ai[row]; 1411 rmax = imax[row]; 1412 nrow = ailen[row]; 1413 low = 0; 1414 high = nrow; 1415 for (l=0; l<n; l++) { /* loop over added columns */ 1416 if (in[l] < 0) continue; 1417 #if defined(PETSC_USE_DEBUG) 1418 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1419 #endif 1420 col = in[l]; 1421 if (roworiented) { 1422 value = v + k*(stepval+bs)*bs + l*bs; 1423 } else { 1424 value = v + l*(stepval+bs)*bs + k*bs; 1425 } 1426 if (col <= lastcol) low = 0; else high = nrow; 1427 lastcol = col; 1428 while (high-low > 7) { 1429 t = (low+high)/2; 1430 if (rp[t] > col) high = t; 1431 else low = t; 1432 } 1433 for (i=low; i<high; i++) { 1434 if (rp[i] > col) break; 1435 if (rp[i] == col) { 1436 bap = ap + bs2*i; 1437 if (roworiented) { 1438 if (is == ADD_VALUES) { 1439 for (ii=0; ii<bs; ii++,value+=stepval) { 1440 for (jj=ii; jj<bs2; jj+=bs) { 1441 bap[jj] += *value++; 1442 } 1443 } 1444 } else { 1445 for (ii=0; ii<bs; ii++,value+=stepval) { 1446 for (jj=ii; jj<bs2; jj+=bs) { 1447 bap[jj] = *value++; 1448 } 1449 } 1450 } 1451 } else { 1452 if (is == ADD_VALUES) { 1453 for (ii=0; ii<bs; ii++,value+=stepval) { 1454 for (jj=0; jj<bs; jj++) { 1455 *bap++ += *value++; 1456 } 1457 } 1458 } else { 1459 for (ii=0; ii<bs; ii++,value+=stepval) { 1460 for (jj=0; jj<bs; jj++) { 1461 *bap++ = *value++; 1462 } 1463 } 1464 } 1465 } 1466 goto noinsert2; 1467 } 1468 } 1469 if (nonew == 1) goto noinsert2; 1470 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1471 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1472 N = nrow++ - 1; high++; 1473 /* shift up all the later entries in this row */ 1474 for (ii=N; ii>=i; ii--) { 1475 rp[ii+1] = rp[ii]; 1476 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1477 } 1478 if (N >= i) { 1479 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1480 } 1481 rp[i] = col; 1482 bap = ap + bs2*i; 1483 if (roworiented) { 1484 for (ii=0; ii<bs; ii++,value+=stepval) { 1485 for (jj=ii; jj<bs2; jj+=bs) { 1486 bap[jj] = *value++; 1487 } 1488 } 1489 } else { 1490 for (ii=0; ii<bs; ii++,value+=stepval) { 1491 for (jj=0; jj<bs; jj++) { 1492 *bap++ = *value++; 1493 } 1494 } 1495 } 1496 noinsert2:; 1497 low = i; 1498 } 1499 ailen[row] = nrow; 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1506 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1507 { 1508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1509 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1510 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1511 PetscErrorCode ierr; 1512 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1513 MatScalar *aa = a->a,*ap; 1514 PetscReal ratio=0.6; 1515 1516 PetscFunctionBegin; 1517 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1518 1519 if (m) rmax = ailen[0]; 1520 for (i=1; i<mbs; i++) { 1521 /* move each row back by the amount of empty slots (fshift) before it*/ 1522 fshift += imax[i-1] - ailen[i-1]; 1523 rmax = PetscMax(rmax,ailen[i]); 1524 if (fshift) { 1525 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1526 N = ailen[i]; 1527 for (j=0; j<N; j++) { 1528 ip[j-fshift] = ip[j]; 1529 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1530 } 1531 } 1532 ai[i] = ai[i-1] + ailen[i-1]; 1533 } 1534 if (mbs) { 1535 fshift += imax[mbs-1] - ailen[mbs-1]; 1536 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1537 } 1538 /* reset ilen and imax for each row */ 1539 for (i=0; i<mbs; i++) { 1540 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1541 } 1542 a->nz = ai[mbs]; 1543 1544 /* diagonals may have moved, so kill the diagonal pointers */ 1545 a->idiagvalid = PETSC_FALSE; 1546 if (fshift && a->diag) { 1547 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1548 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1549 a->diag = 0; 1550 } 1551 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1552 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1553 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1554 a->reallocs = 0; 1555 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1556 1557 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1558 if (a->compressedrow.use){ 1559 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1560 } 1561 1562 A->same_nonzero = PETSC_TRUE; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 /* 1567 This function returns an array of flags which indicate the locations of contiguous 1568 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1569 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1570 Assume: sizes should be long enough to hold all the values. 1571 */ 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1574 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1575 { 1576 PetscInt i,j,k,row; 1577 PetscTruth flg; 1578 1579 PetscFunctionBegin; 1580 for (i=0,j=0; i<n; j++) { 1581 row = idx[i]; 1582 if (row%bs!=0) { /* Not the begining of a block */ 1583 sizes[j] = 1; 1584 i++; 1585 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1586 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1587 i++; 1588 } else { /* Begining of the block, so check if the complete block exists */ 1589 flg = PETSC_TRUE; 1590 for (k=1; k<bs; k++) { 1591 if (row+k != idx[i+k]) { /* break in the block */ 1592 flg = PETSC_FALSE; 1593 break; 1594 } 1595 } 1596 if (flg) { /* No break in the bs */ 1597 sizes[j] = bs; 1598 i+= bs; 1599 } else { 1600 sizes[j] = 1; 1601 i++; 1602 } 1603 } 1604 } 1605 *bs_max = j; 1606 PetscFunctionReturn(0); 1607 } 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1611 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1612 { 1613 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1614 PetscErrorCode ierr; 1615 PetscInt i,j,k,count,*rows; 1616 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1617 PetscScalar zero = 0.0; 1618 MatScalar *aa; 1619 1620 PetscFunctionBegin; 1621 /* Make a copy of the IS and sort it */ 1622 /* allocate memory for rows,sizes */ 1623 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1624 sizes = rows + is_n; 1625 1626 /* copy IS values to rows, and sort them */ 1627 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1628 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1629 if (baij->keepzeroedrows) { 1630 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1631 bs_max = is_n; 1632 A->same_nonzero = PETSC_TRUE; 1633 } else { 1634 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1635 A->same_nonzero = PETSC_FALSE; 1636 } 1637 1638 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1639 row = rows[j]; 1640 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1641 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1642 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1643 if (sizes[i] == bs && !baij->keepzeroedrows) { 1644 if (diag != 0.0) { 1645 if (baij->ilen[row/bs] > 0) { 1646 baij->ilen[row/bs] = 1; 1647 baij->j[baij->i[row/bs]] = row/bs; 1648 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1649 } 1650 /* Now insert all the diagonal values for this bs */ 1651 for (k=0; k<bs; k++) { 1652 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1653 } 1654 } else { /* (diag == 0.0) */ 1655 baij->ilen[row/bs] = 0; 1656 } /* end (diag == 0.0) */ 1657 } else { /* (sizes[i] != bs) */ 1658 #if defined (PETSC_USE_DEBUG) 1659 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1660 #endif 1661 for (k=0; k<count; k++) { 1662 aa[0] = zero; 1663 aa += bs; 1664 } 1665 if (diag != 0.0) { 1666 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 1671 ierr = PetscFree(rows);CHKERRQ(ierr); 1672 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1678 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1679 { 1680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1681 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1682 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1683 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1684 PetscErrorCode ierr; 1685 PetscInt ridx,cidx,bs2=a->bs2; 1686 PetscTruth roworiented=a->roworiented; 1687 MatScalar *ap,value,*aa=a->a,*bap; 1688 1689 PetscFunctionBegin; 1690 for (k=0; k<m; k++) { /* loop over added rows */ 1691 row = im[k]; 1692 brow = row/bs; 1693 if (row < 0) continue; 1694 #if defined(PETSC_USE_DEBUG) 1695 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1696 #endif 1697 rp = aj + ai[brow]; 1698 ap = aa + bs2*ai[brow]; 1699 rmax = imax[brow]; 1700 nrow = ailen[brow]; 1701 low = 0; 1702 high = nrow; 1703 for (l=0; l<n; l++) { /* loop over added columns */ 1704 if (in[l] < 0) continue; 1705 #if defined(PETSC_USE_DEBUG) 1706 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1707 #endif 1708 col = in[l]; bcol = col/bs; 1709 ridx = row % bs; cidx = col % bs; 1710 if (roworiented) { 1711 value = v[l + k*n]; 1712 } else { 1713 value = v[k + l*m]; 1714 } 1715 if (col <= lastcol) low = 0; else high = nrow; 1716 lastcol = col; 1717 while (high-low > 7) { 1718 t = (low+high)/2; 1719 if (rp[t] > bcol) high = t; 1720 else low = t; 1721 } 1722 for (i=low; i<high; i++) { 1723 if (rp[i] > bcol) break; 1724 if (rp[i] == bcol) { 1725 bap = ap + bs2*i + bs*cidx + ridx; 1726 if (is == ADD_VALUES) *bap += value; 1727 else *bap = value; 1728 goto noinsert1; 1729 } 1730 } 1731 if (nonew == 1) goto noinsert1; 1732 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1733 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 1734 N = nrow++ - 1; high++; 1735 /* shift up all the later entries in this row */ 1736 for (ii=N; ii>=i; ii--) { 1737 rp[ii+1] = rp[ii]; 1738 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1739 } 1740 if (N>=i) { 1741 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1742 } 1743 rp[i] = bcol; 1744 ap[bs2*i + bs*cidx + ridx] = value; 1745 a->nz++; 1746 noinsert1:; 1747 low = i; 1748 } 1749 ailen[brow] = nrow; 1750 } 1751 A->same_nonzero = PETSC_FALSE; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1758 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1759 { 1760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1761 Mat outA; 1762 PetscErrorCode ierr; 1763 PetscTruth row_identity,col_identity; 1764 1765 PetscFunctionBegin; 1766 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1767 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1768 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1769 if (!row_identity || !col_identity) { 1770 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1771 } 1772 1773 outA = inA; 1774 inA->factor = FACTOR_LU; 1775 1776 if (!a->diag) { 1777 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1778 } 1779 1780 a->row = row; 1781 a->col = col; 1782 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1783 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1784 1785 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1786 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1787 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1788 1789 /* 1790 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1791 for ILU(0) factorization with natural ordering 1792 */ 1793 if (inA->rmap.bs < 8) { 1794 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1795 } else { 1796 if (!a->solve_work) { 1797 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1798 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1799 } 1800 } 1801 1802 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1803 1804 PetscFunctionReturn(0); 1805 } 1806 #undef __FUNCT__ 1807 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1808 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1809 { 1810 static PetscTruth called = PETSC_FALSE; 1811 MPI_Comm comm = A->comm; 1812 PetscErrorCode ierr; 1813 1814 PetscFunctionBegin; 1815 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1816 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1817 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 EXTERN_C_BEGIN 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1824 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1825 { 1826 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1827 PetscInt i,nz,nbs; 1828 1829 PetscFunctionBegin; 1830 nz = baij->maxnz/baij->bs2; 1831 nbs = baij->nbs; 1832 for (i=0; i<nz; i++) { 1833 baij->j[i] = indices[i]; 1834 } 1835 baij->nz = nz; 1836 for (i=0; i<nbs; i++) { 1837 baij->ilen[i] = baij->imax[i]; 1838 } 1839 1840 PetscFunctionReturn(0); 1841 } 1842 EXTERN_C_END 1843 1844 #undef __FUNCT__ 1845 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1846 /*@ 1847 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1848 in the matrix. 1849 1850 Input Parameters: 1851 + mat - the SeqBAIJ matrix 1852 - indices - the column indices 1853 1854 Level: advanced 1855 1856 Notes: 1857 This can be called if you have precomputed the nonzero structure of the 1858 matrix and want to provide it to the matrix object to improve the performance 1859 of the MatSetValues() operation. 1860 1861 You MUST have set the correct numbers of nonzeros per row in the call to 1862 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1863 1864 MUST be called before any calls to MatSetValues(); 1865 1866 @*/ 1867 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1868 { 1869 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1870 1871 PetscFunctionBegin; 1872 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1873 PetscValidPointer(indices,2); 1874 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1875 if (f) { 1876 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1877 } else { 1878 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1879 } 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1885 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1886 { 1887 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1888 PetscErrorCode ierr; 1889 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1890 PetscReal atmp; 1891 PetscScalar *x,zero = 0.0; 1892 MatScalar *aa; 1893 PetscInt ncols,brow,krow,kcol; 1894 1895 PetscFunctionBegin; 1896 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1897 bs = A->rmap.bs; 1898 aa = a->a; 1899 ai = a->i; 1900 aj = a->j; 1901 mbs = a->mbs; 1902 1903 ierr = VecSet(v,zero);CHKERRQ(ierr); 1904 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1905 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1906 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1907 for (i=0; i<mbs; i++) { 1908 ncols = ai[1] - ai[0]; ai++; 1909 brow = bs*i; 1910 for (j=0; j<ncols; j++){ 1911 /* bcol = bs*(*aj); */ 1912 for (kcol=0; kcol<bs; kcol++){ 1913 for (krow=0; krow<bs; krow++){ 1914 atmp = PetscAbsScalar(*aa); aa++; 1915 row = brow + krow; /* row index */ 1916 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1917 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1918 } 1919 } 1920 aj++; 1921 } 1922 } 1923 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatCopy_SeqBAIJ" 1929 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1930 { 1931 PetscErrorCode ierr; 1932 1933 PetscFunctionBegin; 1934 /* If the two matrices have the same copy implementation, use fast copy. */ 1935 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1936 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1937 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1938 1939 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1940 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1941 } 1942 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1943 } else { 1944 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1951 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1952 { 1953 PetscErrorCode ierr; 1954 1955 PetscFunctionBegin; 1956 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1962 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1963 { 1964 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1965 PetscFunctionBegin; 1966 *array = a->a; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1972 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1973 { 1974 PetscFunctionBegin; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #include "petscblaslapack.h" 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1981 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1982 { 1983 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1984 PetscErrorCode ierr; 1985 PetscInt i,bs=Y->rmap.bs,j,bs2; 1986 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1987 1988 PetscFunctionBegin; 1989 if (str == SAME_NONZERO_PATTERN) { 1990 PetscScalar alpha = a; 1991 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1992 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1993 if (y->xtoy && y->XtoY != X) { 1994 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1995 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1996 } 1997 if (!y->xtoy) { /* get xtoy */ 1998 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1999 y->XtoY = X; 2000 } 2001 bs2 = bs*bs; 2002 for (i=0; i<x->nz; i++) { 2003 j = 0; 2004 while (j < bs2){ 2005 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2006 j++; 2007 } 2008 } 2009 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2010 } else { 2011 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2018 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2019 { 2020 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2021 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2022 PetscScalar *aa = a->a; 2023 2024 PetscFunctionBegin; 2025 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2031 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2032 { 2033 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2034 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2035 PetscScalar *aa = a->a; 2036 2037 PetscFunctionBegin; 2038 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 2043 /* -------------------------------------------------------------------*/ 2044 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2045 MatGetRow_SeqBAIJ, 2046 MatRestoreRow_SeqBAIJ, 2047 MatMult_SeqBAIJ_N, 2048 /* 4*/ MatMultAdd_SeqBAIJ_N, 2049 MatMultTranspose_SeqBAIJ, 2050 MatMultTransposeAdd_SeqBAIJ, 2051 MatSolve_SeqBAIJ_N, 2052 0, 2053 0, 2054 /*10*/ 0, 2055 MatLUFactor_SeqBAIJ, 2056 0, 2057 0, 2058 MatTranspose_SeqBAIJ, 2059 /*15*/ MatGetInfo_SeqBAIJ, 2060 MatEqual_SeqBAIJ, 2061 MatGetDiagonal_SeqBAIJ, 2062 MatDiagonalScale_SeqBAIJ, 2063 MatNorm_SeqBAIJ, 2064 /*20*/ 0, 2065 MatAssemblyEnd_SeqBAIJ, 2066 0, 2067 MatSetOption_SeqBAIJ, 2068 MatZeroEntries_SeqBAIJ, 2069 /*25*/ MatZeroRows_SeqBAIJ, 2070 MatLUFactorSymbolic_SeqBAIJ, 2071 MatLUFactorNumeric_SeqBAIJ_N, 2072 MatCholeskyFactorSymbolic_SeqBAIJ, 2073 MatCholeskyFactorNumeric_SeqBAIJ_N, 2074 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2075 MatILUFactorSymbolic_SeqBAIJ, 2076 MatICCFactorSymbolic_SeqBAIJ, 2077 MatGetArray_SeqBAIJ, 2078 MatRestoreArray_SeqBAIJ, 2079 /*35*/ MatDuplicate_SeqBAIJ, 2080 0, 2081 0, 2082 MatILUFactor_SeqBAIJ, 2083 0, 2084 /*40*/ MatAXPY_SeqBAIJ, 2085 MatGetSubMatrices_SeqBAIJ, 2086 MatIncreaseOverlap_SeqBAIJ, 2087 MatGetValues_SeqBAIJ, 2088 MatCopy_SeqBAIJ, 2089 /*45*/ MatPrintHelp_SeqBAIJ, 2090 MatScale_SeqBAIJ, 2091 0, 2092 0, 2093 0, 2094 /*50*/ 0, 2095 MatGetRowIJ_SeqBAIJ, 2096 MatRestoreRowIJ_SeqBAIJ, 2097 0, 2098 0, 2099 /*55*/ 0, 2100 0, 2101 0, 2102 0, 2103 MatSetValuesBlocked_SeqBAIJ, 2104 /*60*/ MatGetSubMatrix_SeqBAIJ, 2105 MatDestroy_SeqBAIJ, 2106 MatView_SeqBAIJ, 2107 0, 2108 0, 2109 /*65*/ 0, 2110 0, 2111 0, 2112 0, 2113 0, 2114 /*70*/ MatGetRowMax_SeqBAIJ, 2115 MatConvert_Basic, 2116 0, 2117 0, 2118 0, 2119 /*75*/ 0, 2120 0, 2121 0, 2122 0, 2123 0, 2124 /*80*/ 0, 2125 0, 2126 0, 2127 0, 2128 MatLoad_SeqBAIJ, 2129 /*85*/ 0, 2130 0, 2131 0, 2132 0, 2133 0, 2134 /*90*/ 0, 2135 0, 2136 0, 2137 0, 2138 0, 2139 /*95*/ 0, 2140 0, 2141 0, 2142 0, 2143 0, 2144 /*100*/0, 2145 0, 2146 0, 2147 0, 2148 0, 2149 /*105*/0, 2150 MatRealPart_SeqBAIJ, 2151 MatImaginaryPart_SeqBAIJ 2152 }; 2153 2154 EXTERN_C_BEGIN 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2157 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2158 { 2159 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2160 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2161 PetscErrorCode ierr; 2162 2163 PetscFunctionBegin; 2164 if (aij->nonew != 1) { 2165 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2166 } 2167 2168 /* allocate space for values if not already there */ 2169 if (!aij->saved_values) { 2170 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2171 } 2172 2173 /* copy values over */ 2174 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 EXTERN_C_END 2178 2179 EXTERN_C_BEGIN 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2182 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2183 { 2184 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2185 PetscErrorCode ierr; 2186 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2187 2188 PetscFunctionBegin; 2189 if (aij->nonew != 1) { 2190 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2191 } 2192 if (!aij->saved_values) { 2193 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2194 } 2195 2196 /* copy values over */ 2197 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2198 PetscFunctionReturn(0); 2199 } 2200 EXTERN_C_END 2201 2202 EXTERN_C_BEGIN 2203 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2204 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2205 EXTERN_C_END 2206 2207 EXTERN_C_BEGIN 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2210 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2211 { 2212 Mat_SeqBAIJ *b; 2213 PetscErrorCode ierr; 2214 PetscInt i,mbs,nbs,bs2,newbs = bs; 2215 PetscTruth flg,skipallocation = PETSC_FALSE; 2216 2217 PetscFunctionBegin; 2218 2219 if (nz == MAT_SKIP_ALLOCATION) { 2220 skipallocation = PETSC_TRUE; 2221 nz = 0; 2222 } 2223 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2224 if (nnz && newbs != bs) { 2225 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2226 } 2227 bs = newbs; 2228 2229 B->rmap.bs = B->cmap.bs = bs; 2230 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2231 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2232 2233 B->preallocated = PETSC_TRUE; 2234 2235 mbs = B->rmap.n/bs; 2236 nbs = B->cmap.n/bs; 2237 bs2 = bs*bs; 2238 2239 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2240 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2241 } 2242 2243 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2244 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2245 if (nnz) { 2246 for (i=0; i<mbs; i++) { 2247 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2248 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2249 } 2250 } 2251 2252 b = (Mat_SeqBAIJ*)B->data; 2253 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2254 B->ops->solve = MatSolve_SeqBAIJ_Update; 2255 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2256 if (!flg) { 2257 switch (bs) { 2258 case 1: 2259 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2260 B->ops->mult = MatMult_SeqBAIJ_1; 2261 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2262 break; 2263 case 2: 2264 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2265 B->ops->mult = MatMult_SeqBAIJ_2; 2266 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2267 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2268 break; 2269 case 3: 2270 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2271 B->ops->mult = MatMult_SeqBAIJ_3; 2272 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2273 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2274 break; 2275 case 4: 2276 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2277 B->ops->mult = MatMult_SeqBAIJ_4; 2278 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2279 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2280 break; 2281 case 5: 2282 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2283 B->ops->mult = MatMult_SeqBAIJ_5; 2284 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2285 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2286 break; 2287 case 6: 2288 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2289 B->ops->mult = MatMult_SeqBAIJ_6; 2290 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2291 break; 2292 case 7: 2293 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2294 B->ops->mult = MatMult_SeqBAIJ_7; 2295 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2296 break; 2297 default: 2298 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2299 B->ops->mult = MatMult_SeqBAIJ_N; 2300 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2301 break; 2302 } 2303 } 2304 B->rmap.bs = bs; 2305 b->mbs = mbs; 2306 b->nbs = nbs; 2307 if (!skipallocation) { 2308 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2309 /* b->ilen will count nonzeros in each block row so far. */ 2310 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2311 if (!nnz) { 2312 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2313 else if (nz <= 0) nz = 1; 2314 for (i=0; i<mbs; i++) b->imax[i] = nz; 2315 nz = nz*mbs; 2316 } else { 2317 nz = 0; 2318 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2319 } 2320 2321 /* allocate the matrix space */ 2322 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2323 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2324 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2325 b->singlemalloc = PETSC_TRUE; 2326 2327 b->i[0] = 0; 2328 for (i=1; i<mbs+1; i++) { 2329 b->i[i] = b->i[i-1] + b->imax[i-1]; 2330 } 2331 b->freedata = PETSC_TRUE; 2332 } else { 2333 b->freedata = PETSC_FALSE; 2334 } 2335 2336 B->rmap.bs = bs; 2337 b->bs2 = bs2; 2338 b->mbs = mbs; 2339 b->nz = 0; 2340 b->maxnz = nz*bs2; 2341 B->info.nz_unneeded = (PetscReal)b->maxnz; 2342 PetscFunctionReturn(0); 2343 } 2344 EXTERN_C_END 2345 2346 /*MC 2347 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2348 block sparse compressed row format. 2349 2350 Options Database Keys: 2351 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2352 2353 Level: beginner 2354 2355 .seealso: MatCreateSeqBAIJ() 2356 M*/ 2357 2358 EXTERN_C_BEGIN 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatCreate_SeqBAIJ" 2361 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2362 { 2363 PetscErrorCode ierr; 2364 PetscMPIInt size; 2365 Mat_SeqBAIJ *b; 2366 2367 PetscFunctionBegin; 2368 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2369 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2370 2371 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2372 B->data = (void*)b; 2373 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2374 B->factor = 0; 2375 B->mapping = 0; 2376 b->row = 0; 2377 b->col = 0; 2378 b->icol = 0; 2379 b->reallocs = 0; 2380 b->saved_values = 0; 2381 #if defined(PETSC_USE_MAT_SINGLE) 2382 b->setvalueslen = 0; 2383 b->setvaluescopy = PETSC_NULL; 2384 #endif 2385 2386 b->sorted = PETSC_FALSE; 2387 b->roworiented = PETSC_TRUE; 2388 b->nonew = 0; 2389 b->diag = 0; 2390 b->solve_work = 0; 2391 b->mult_work = 0; 2392 B->spptr = 0; 2393 B->info.nz_unneeded = (PetscReal)b->maxnz; 2394 b->keepzeroedrows = PETSC_FALSE; 2395 b->xtoy = 0; 2396 b->XtoY = 0; 2397 b->compressedrow.use = PETSC_FALSE; 2398 b->compressedrow.nrows = 0; 2399 b->compressedrow.i = PETSC_NULL; 2400 b->compressedrow.rindex = PETSC_NULL; 2401 b->compressedrow.checked = PETSC_FALSE; 2402 B->same_nonzero = PETSC_FALSE; 2403 2404 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2405 "MatInvertBlockDiagonal_SeqBAIJ", 2406 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2407 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2408 "MatStoreValues_SeqBAIJ", 2409 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2410 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2411 "MatRetrieveValues_SeqBAIJ", 2412 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2414 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2415 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2417 "MatConvert_SeqBAIJ_SeqAIJ", 2418 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2419 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2420 "MatConvert_SeqBAIJ_SeqSBAIJ", 2421 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2422 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2423 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2424 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 EXTERN_C_END 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2431 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2432 { 2433 Mat C; 2434 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2435 PetscErrorCode ierr; 2436 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2437 2438 PetscFunctionBegin; 2439 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2440 2441 *B = 0; 2442 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2443 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2444 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2445 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2446 c = (Mat_SeqBAIJ*)C->data; 2447 2448 C->rmap.N = A->rmap.N; 2449 C->cmap.N = A->cmap.N; 2450 C->rmap.bs = A->rmap.bs; 2451 c->bs2 = a->bs2; 2452 c->mbs = a->mbs; 2453 c->nbs = a->nbs; 2454 2455 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2456 for (i=0; i<mbs; i++) { 2457 c->imax[i] = a->imax[i]; 2458 c->ilen[i] = a->ilen[i]; 2459 } 2460 2461 /* allocate the matrix space */ 2462 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2463 c->singlemalloc = PETSC_TRUE; 2464 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2465 if (mbs > 0) { 2466 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2467 if (cpvalues == MAT_COPY_VALUES) { 2468 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2469 } else { 2470 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2471 } 2472 } 2473 c->sorted = a->sorted; 2474 c->roworiented = a->roworiented; 2475 c->nonew = a->nonew; 2476 2477 if (a->diag) { 2478 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2479 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2480 for (i=0; i<mbs; i++) { 2481 c->diag[i] = a->diag[i]; 2482 } 2483 } else c->diag = 0; 2484 c->nz = a->nz; 2485 c->maxnz = a->maxnz; 2486 c->solve_work = 0; 2487 c->mult_work = 0; 2488 c->freedata = PETSC_TRUE; 2489 C->preallocated = PETSC_TRUE; 2490 C->assembled = PETSC_TRUE; 2491 2492 c->compressedrow.use = a->compressedrow.use; 2493 c->compressedrow.nrows = a->compressedrow.nrows; 2494 c->compressedrow.checked = a->compressedrow.checked; 2495 if ( a->compressedrow.checked && a->compressedrow.use){ 2496 i = a->compressedrow.nrows; 2497 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2498 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2499 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2500 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2501 } else { 2502 c->compressedrow.use = PETSC_FALSE; 2503 c->compressedrow.i = PETSC_NULL; 2504 c->compressedrow.rindex = PETSC_NULL; 2505 } 2506 C->same_nonzero = A->same_nonzero; 2507 *B = C; 2508 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2509 PetscFunctionReturn(0); 2510 } 2511 2512 #undef __FUNCT__ 2513 #define __FUNCT__ "MatLoad_SeqBAIJ" 2514 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2515 { 2516 Mat_SeqBAIJ *a; 2517 Mat B; 2518 PetscErrorCode ierr; 2519 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2520 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2521 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2522 PetscInt *masked,nmask,tmp,bs2,ishift; 2523 PetscMPIInt size; 2524 int fd; 2525 PetscScalar *aa; 2526 MPI_Comm comm = ((PetscObject)viewer)->comm; 2527 2528 PetscFunctionBegin; 2529 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2530 bs2 = bs*bs; 2531 2532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2533 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2534 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2535 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2536 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2537 M = header[1]; N = header[2]; nz = header[3]; 2538 2539 if (header[3] < 0) { 2540 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2541 } 2542 2543 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2544 2545 /* 2546 This code adds extra rows to make sure the number of rows is 2547 divisible by the blocksize 2548 */ 2549 mbs = M/bs; 2550 extra_rows = bs - M + bs*(mbs); 2551 if (extra_rows == bs) extra_rows = 0; 2552 else mbs++; 2553 if (extra_rows) { 2554 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2555 } 2556 2557 /* read in row lengths */ 2558 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2559 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2560 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2561 2562 /* read in column indices */ 2563 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2564 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2565 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2566 2567 /* loop over row lengths determining block row lengths */ 2568 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2569 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2570 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2571 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2572 masked = mask + mbs; 2573 rowcount = 0; nzcount = 0; 2574 for (i=0; i<mbs; i++) { 2575 nmask = 0; 2576 for (j=0; j<bs; j++) { 2577 kmax = rowlengths[rowcount]; 2578 for (k=0; k<kmax; k++) { 2579 tmp = jj[nzcount++]/bs; 2580 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2581 } 2582 rowcount++; 2583 } 2584 browlengths[i] += nmask; 2585 /* zero out the mask elements we set */ 2586 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2587 } 2588 2589 /* create our matrix */ 2590 ierr = MatCreate(comm,&B); 2591 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2592 ierr = MatSetType(B,type);CHKERRQ(ierr); 2593 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2594 a = (Mat_SeqBAIJ*)B->data; 2595 2596 /* set matrix "i" values */ 2597 a->i[0] = 0; 2598 for (i=1; i<= mbs; i++) { 2599 a->i[i] = a->i[i-1] + browlengths[i-1]; 2600 a->ilen[i-1] = browlengths[i-1]; 2601 } 2602 a->nz = 0; 2603 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2604 2605 /* read in nonzero values */ 2606 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2607 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2608 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2609 2610 /* set "a" and "j" values into matrix */ 2611 nzcount = 0; jcount = 0; 2612 for (i=0; i<mbs; i++) { 2613 nzcountb = nzcount; 2614 nmask = 0; 2615 for (j=0; j<bs; j++) { 2616 kmax = rowlengths[i*bs+j]; 2617 for (k=0; k<kmax; k++) { 2618 tmp = jj[nzcount++]/bs; 2619 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2620 } 2621 } 2622 /* sort the masked values */ 2623 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2624 2625 /* set "j" values into matrix */ 2626 maskcount = 1; 2627 for (j=0; j<nmask; j++) { 2628 a->j[jcount++] = masked[j]; 2629 mask[masked[j]] = maskcount++; 2630 } 2631 /* set "a" values into matrix */ 2632 ishift = bs2*a->i[i]; 2633 for (j=0; j<bs; j++) { 2634 kmax = rowlengths[i*bs+j]; 2635 for (k=0; k<kmax; k++) { 2636 tmp = jj[nzcountb]/bs ; 2637 block = mask[tmp] - 1; 2638 point = jj[nzcountb] - bs*tmp; 2639 idx = ishift + bs2*block + j + bs*point; 2640 a->a[idx] = (MatScalar)aa[nzcountb++]; 2641 } 2642 } 2643 /* zero out the mask elements we set */ 2644 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2645 } 2646 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2647 2648 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2649 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2650 ierr = PetscFree(aa);CHKERRQ(ierr); 2651 ierr = PetscFree(jj);CHKERRQ(ierr); 2652 ierr = PetscFree(mask);CHKERRQ(ierr); 2653 2654 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2655 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2656 ierr = MatView_Private(B);CHKERRQ(ierr); 2657 2658 *A = B; 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "MatCreateSeqBAIJ" 2664 /*@C 2665 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2666 compressed row) format. For good matrix assembly performance the 2667 user should preallocate the matrix storage by setting the parameter nz 2668 (or the array nnz). By setting these parameters accurately, performance 2669 during matrix assembly can be increased by more than a factor of 50. 2670 2671 Collective on MPI_Comm 2672 2673 Input Parameters: 2674 + comm - MPI communicator, set to PETSC_COMM_SELF 2675 . bs - size of block 2676 . m - number of rows 2677 . n - number of columns 2678 . nz - number of nonzero blocks per block row (same for all rows) 2679 - nnz - array containing the number of nonzero blocks in the various block rows 2680 (possibly different for each block row) or PETSC_NULL 2681 2682 Output Parameter: 2683 . A - the matrix 2684 2685 Options Database Keys: 2686 . -mat_no_unroll - uses code that does not unroll the loops in the 2687 block calculations (much slower) 2688 . -mat_block_size - size of the blocks to use 2689 2690 Level: intermediate 2691 2692 Notes: 2693 The number of rows and columns must be divisible by blocksize. 2694 2695 If the nnz parameter is given then the nz parameter is ignored 2696 2697 A nonzero block is any block that as 1 or more nonzeros in it 2698 2699 The block AIJ format is fully compatible with standard Fortran 77 2700 storage. That is, the stored row and column indices can begin at 2701 either one (as in Fortran) or zero. See the users' manual for details. 2702 2703 Specify the preallocated storage with either nz or nnz (not both). 2704 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2705 allocation. For additional details, see the users manual chapter on 2706 matrices. 2707 2708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2709 @*/ 2710 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2711 { 2712 PetscErrorCode ierr; 2713 2714 PetscFunctionBegin; 2715 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2716 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2717 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2718 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2724 /*@C 2725 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2726 per row in the matrix. For good matrix assembly performance the 2727 user should preallocate the matrix storage by setting the parameter nz 2728 (or the array nnz). By setting these parameters accurately, performance 2729 during matrix assembly can be increased by more than a factor of 50. 2730 2731 Collective on MPI_Comm 2732 2733 Input Parameters: 2734 + A - the matrix 2735 . bs - size of block 2736 . nz - number of block nonzeros per block row (same for all rows) 2737 - nnz - array containing the number of block nonzeros in the various block rows 2738 (possibly different for each block row) or PETSC_NULL 2739 2740 Options Database Keys: 2741 . -mat_no_unroll - uses code that does not unroll the loops in the 2742 block calculations (much slower) 2743 . -mat_block_size - size of the blocks to use 2744 2745 Level: intermediate 2746 2747 Notes: 2748 If the nnz parameter is given then the nz parameter is ignored 2749 2750 The block AIJ format is fully compatible with standard Fortran 77 2751 storage. That is, the stored row and column indices can begin at 2752 either one (as in Fortran) or zero. See the users' manual for details. 2753 2754 Specify the preallocated storage with either nz or nnz (not both). 2755 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2756 allocation. For additional details, see the users manual chapter on 2757 matrices. 2758 2759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2760 @*/ 2761 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2762 { 2763 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2764 2765 PetscFunctionBegin; 2766 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2767 if (f) { 2768 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2775 /*@ 2776 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2777 (upper triangular entries in CSR format) provided by the user. 2778 2779 Collective on MPI_Comm 2780 2781 Input Parameters: 2782 + comm - must be an MPI communicator of size 1 2783 . bs - size of block 2784 . m - number of rows 2785 . n - number of columns 2786 . i - row indices 2787 . j - column indices 2788 - a - matrix values 2789 2790 Output Parameter: 2791 . mat - the matrix 2792 2793 Level: intermediate 2794 2795 Notes: 2796 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2797 once the matrix is destroyed 2798 2799 You cannot set new nonzero locations into this matrix, that will generate an error. 2800 2801 The i and j indices are 0 based 2802 2803 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2804 2805 @*/ 2806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2807 { 2808 PetscErrorCode ierr; 2809 PetscInt ii; 2810 Mat_SeqBAIJ *baij; 2811 2812 PetscFunctionBegin; 2813 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2814 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2815 2816 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2817 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2818 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2819 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2820 baij = (Mat_SeqBAIJ*)(*mat)->data; 2821 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2822 2823 baij->i = i; 2824 baij->j = j; 2825 baij->a = a; 2826 baij->singlemalloc = PETSC_FALSE; 2827 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2828 baij->freedata = PETSC_FALSE; 2829 2830 for (ii=0; ii<m; ii++) { 2831 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2832 #if defined(PETSC_USE_DEBUG) 2833 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2834 #endif 2835 } 2836 #if defined(PETSC_USE_DEBUG) 2837 for (ii=0; ii<baij->i[m]; ii++) { 2838 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2839 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2840 } 2841 #endif 2842 2843 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847