1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt n = a->mbs,i; 785 786 PetscFunctionBegin; 787 *nn = n; 788 if (!ia) PetscFunctionReturn(0); 789 if (symmetric) { 790 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 791 } else if (oshift == 1) { 792 /* temporarily add 1 to i and j indices */ 793 PetscInt nz = a->i[n]; 794 for (i=0; i<nz; i++) a->j[i]++; 795 for (i=0; i<n+1; i++) a->i[i]++; 796 *ia = a->i; *ja = a->j; 797 } else { 798 *ia = a->i; *ja = a->j; 799 } 800 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 809 PetscErrorCode ierr; 810 PetscInt i,n = a->mbs; 811 812 PetscFunctionBegin; 813 if (!ia) PetscFunctionReturn(0); 814 if (symmetric) { 815 ierr = PetscFree(*ia);CHKERRQ(ierr); 816 ierr = PetscFree(*ja);CHKERRQ(ierr); 817 } else if (oshift == 1) { 818 PetscInt nz = a->i[n]-1; 819 for (i=0; i<nz; i++) a->j[i]--; 820 for (i=0; i<n+1; i++) a->i[i]--; 821 } 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatDestroy_SeqBAIJ" 827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 #if defined(PETSC_USE_LOG) 834 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 835 #endif 836 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 837 if (a->row) { 838 ierr = ISDestroy(a->row);CHKERRQ(ierr); 839 } 840 if (a->col) { 841 ierr = ISDestroy(a->col);CHKERRQ(ierr); 842 } 843 ierr = PetscFree(a->diag);CHKERRQ(ierr); 844 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 845 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 846 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 847 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 848 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 849 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 850 #if defined(PETSC_USE_MAT_SINGLE) 851 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 852 #endif 853 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 854 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 855 856 ierr = PetscFree(a);CHKERRQ(ierr); 857 858 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 864 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 865 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatSetOption_SeqBAIJ" 871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 872 { 873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 switch (op) { 878 case MAT_ROW_ORIENTED: 879 a->roworiented = PETSC_TRUE; 880 break; 881 case MAT_COLUMN_ORIENTED: 882 a->roworiented = PETSC_FALSE; 883 break; 884 case MAT_COLUMNS_SORTED: 885 a->sorted = PETSC_TRUE; 886 break; 887 case MAT_COLUMNS_UNSORTED: 888 a->sorted = PETSC_FALSE; 889 break; 890 case MAT_KEEP_ZEROED_ROWS: 891 a->keepzeroedrows = PETSC_TRUE; 892 break; 893 case MAT_NO_NEW_NONZERO_LOCATIONS: 894 a->nonew = 1; 895 break; 896 case MAT_NEW_NONZERO_LOCATION_ERR: 897 a->nonew = -1; 898 break; 899 case MAT_NEW_NONZERO_ALLOCATION_ERR: 900 a->nonew = -2; 901 break; 902 case MAT_YES_NEW_NONZERO_LOCATIONS: 903 a->nonew = 0; 904 break; 905 case MAT_ROWS_SORTED: 906 case MAT_ROWS_UNSORTED: 907 case MAT_YES_NEW_DIAGONALS: 908 case MAT_IGNORE_OFF_PROC_ENTRIES: 909 case MAT_USE_HASH_TABLE: 910 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 911 break; 912 case MAT_NO_NEW_DIAGONALS: 913 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 914 case MAT_SYMMETRIC: 915 case MAT_STRUCTURALLY_SYMMETRIC: 916 case MAT_NOT_SYMMETRIC: 917 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 918 case MAT_HERMITIAN: 919 case MAT_NOT_HERMITIAN: 920 case MAT_SYMMETRY_ETERNAL: 921 case MAT_NOT_SYMMETRY_ETERNAL: 922 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 923 break; 924 default: 925 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 926 } 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "MatGetRow_SeqBAIJ" 932 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 933 { 934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 935 PetscErrorCode ierr; 936 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 937 MatScalar *aa,*aa_i; 938 PetscScalar *v_i; 939 940 PetscFunctionBegin; 941 bs = A->rmap.bs; 942 ai = a->i; 943 aj = a->j; 944 aa = a->a; 945 bs2 = a->bs2; 946 947 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 948 949 bn = row/bs; /* Block number */ 950 bp = row % bs; /* Block Position */ 951 M = ai[bn+1] - ai[bn]; 952 *nz = bs*M; 953 954 if (v) { 955 *v = 0; 956 if (*nz) { 957 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 958 for (i=0; i<M; i++) { /* for each block in the block row */ 959 v_i = *v + i*bs; 960 aa_i = aa + bs2*(ai[bn] + i); 961 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 962 } 963 } 964 } 965 966 if (idx) { 967 *idx = 0; 968 if (*nz) { 969 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 970 for (i=0; i<M; i++) { /* for each block in the block row */ 971 idx_i = *idx + i*bs; 972 itmp = bs*aj[ai[bn] + i]; 973 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 974 } 975 } 976 } 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 982 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 983 { 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 988 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatTranspose_SeqBAIJ" 994 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 995 { 996 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 997 Mat C; 998 PetscErrorCode ierr; 999 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1000 PetscInt *rows,*cols,bs2=a->bs2; 1001 PetscScalar *array; 1002 1003 PetscFunctionBegin; 1004 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1005 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1006 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1007 1008 #if defined(PETSC_USE_MAT_SINGLE) 1009 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1010 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1011 #else 1012 array = a->a; 1013 #endif 1014 1015 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1016 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1017 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1018 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1019 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1020 ierr = PetscFree(col);CHKERRQ(ierr); 1021 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1022 cols = rows + bs; 1023 for (i=0; i<mbs; i++) { 1024 cols[0] = i*bs; 1025 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1026 len = ai[i+1] - ai[i]; 1027 for (j=0; j<len; j++) { 1028 rows[0] = (*aj++)*bs; 1029 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1030 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1031 array += bs2; 1032 } 1033 } 1034 ierr = PetscFree(rows);CHKERRQ(ierr); 1035 #if defined(PETSC_USE_MAT_SINGLE) 1036 ierr = PetscFree(array); 1037 #endif 1038 1039 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 1042 if (B) { 1043 *B = C; 1044 } else { 1045 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1046 } 1047 PetscFunctionReturn(0); 1048 } 1049 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1052 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1053 { 1054 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1057 int fd; 1058 PetscScalar *aa; 1059 FILE *file; 1060 1061 PetscFunctionBegin; 1062 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1063 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1064 col_lens[0] = MAT_FILE_COOKIE; 1065 1066 col_lens[1] = A->rmap.N; 1067 col_lens[2] = A->cmap.n; 1068 col_lens[3] = a->nz*bs2; 1069 1070 /* store lengths of each row and write (including header) to file */ 1071 count = 0; 1072 for (i=0; i<a->mbs; i++) { 1073 for (j=0; j<bs; j++) { 1074 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1075 } 1076 } 1077 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1078 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1079 1080 /* store column indices (zero start index) */ 1081 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1082 count = 0; 1083 for (i=0; i<a->mbs; i++) { 1084 for (j=0; j<bs; j++) { 1085 for (k=a->i[i]; k<a->i[i+1]; k++) { 1086 for (l=0; l<bs; l++) { 1087 jj[count++] = bs*a->j[k] + l; 1088 } 1089 } 1090 } 1091 } 1092 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1093 ierr = PetscFree(jj);CHKERRQ(ierr); 1094 1095 /* store nonzero values */ 1096 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1097 count = 0; 1098 for (i=0; i<a->mbs; i++) { 1099 for (j=0; j<bs; j++) { 1100 for (k=a->i[i]; k<a->i[i+1]; k++) { 1101 for (l=0; l<bs; l++) { 1102 aa[count++] = a->a[bs2*k + l*bs + j]; 1103 } 1104 } 1105 } 1106 } 1107 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1108 ierr = PetscFree(aa);CHKERRQ(ierr); 1109 1110 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1111 if (file) { 1112 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1119 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1120 { 1121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1124 PetscViewerFormat format; 1125 1126 PetscFunctionBegin; 1127 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1128 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1129 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1130 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1131 Mat aij; 1132 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1133 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1134 ierr = MatDestroy(aij);CHKERRQ(ierr); 1135 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1136 PetscFunctionReturn(0); 1137 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1138 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1139 for (i=0; i<a->mbs; i++) { 1140 for (j=0; j<bs; j++) { 1141 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1142 for (k=a->i[i]; k<a->i[i+1]; k++) { 1143 for (l=0; l<bs; l++) { 1144 #if defined(PETSC_USE_COMPLEX) 1145 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1146 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1147 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1148 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1149 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1150 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1151 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1152 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1153 } 1154 #else 1155 if (a->a[bs2*k + l*bs + j] != 0.0) { 1156 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1157 } 1158 #endif 1159 } 1160 } 1161 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1162 } 1163 } 1164 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1165 } else { 1166 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1167 for (i=0; i<a->mbs; i++) { 1168 for (j=0; j<bs; j++) { 1169 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1170 for (k=a->i[i]; k<a->i[i+1]; k++) { 1171 for (l=0; l<bs; l++) { 1172 #if defined(PETSC_USE_COMPLEX) 1173 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1174 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1175 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1176 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1177 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1178 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1179 } else { 1180 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1181 } 1182 #else 1183 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1184 #endif 1185 } 1186 } 1187 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1188 } 1189 } 1190 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1198 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1199 { 1200 Mat A = (Mat) Aa; 1201 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1204 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1205 MatScalar *aa; 1206 PetscViewer viewer; 1207 1208 PetscFunctionBegin; 1209 1210 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1211 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1212 1213 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1214 1215 /* loop over matrix elements drawing boxes */ 1216 color = PETSC_DRAW_BLUE; 1217 for (i=0,row=0; i<mbs; i++,row+=bs) { 1218 for (j=a->i[i]; j<a->i[i+1]; j++) { 1219 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1220 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1221 aa = a->a + j*bs2; 1222 for (k=0; k<bs; k++) { 1223 for (l=0; l<bs; l++) { 1224 if (PetscRealPart(*aa++) >= 0.) continue; 1225 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1226 } 1227 } 1228 } 1229 } 1230 color = PETSC_DRAW_CYAN; 1231 for (i=0,row=0; i<mbs; i++,row+=bs) { 1232 for (j=a->i[i]; j<a->i[i+1]; j++) { 1233 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1234 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1235 aa = a->a + j*bs2; 1236 for (k=0; k<bs; k++) { 1237 for (l=0; l<bs; l++) { 1238 if (PetscRealPart(*aa++) != 0.) continue; 1239 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1240 } 1241 } 1242 } 1243 } 1244 1245 color = PETSC_DRAW_RED; 1246 for (i=0,row=0; i<mbs; i++,row+=bs) { 1247 for (j=a->i[i]; j<a->i[i+1]; j++) { 1248 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1249 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1250 aa = a->a + j*bs2; 1251 for (k=0; k<bs; k++) { 1252 for (l=0; l<bs; l++) { 1253 if (PetscRealPart(*aa++) <= 0.) continue; 1254 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1255 } 1256 } 1257 } 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1264 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1265 { 1266 PetscErrorCode ierr; 1267 PetscReal xl,yl,xr,yr,w,h; 1268 PetscDraw draw; 1269 PetscTruth isnull; 1270 1271 PetscFunctionBegin; 1272 1273 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1274 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1275 1276 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1277 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1278 xr += w; yr += h; xl = -w; yl = -h; 1279 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1280 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1281 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatView_SeqBAIJ" 1287 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1288 { 1289 PetscErrorCode ierr; 1290 PetscTruth iascii,isbinary,isdraw; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1294 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1295 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1296 if (iascii){ 1297 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1298 } else if (isbinary) { 1299 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1300 } else if (isdraw) { 1301 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1302 } else { 1303 Mat B; 1304 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1305 ierr = MatView(B,viewer);CHKERRQ(ierr); 1306 ierr = MatDestroy(B);CHKERRQ(ierr); 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1314 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1315 { 1316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1317 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1318 PetscInt *ai = a->i,*ailen = a->ilen; 1319 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1320 MatScalar *ap,*aa = a->a,zero = 0.0; 1321 1322 PetscFunctionBegin; 1323 for (k=0; k<m; k++) { /* loop over rows */ 1324 row = im[k]; brow = row/bs; 1325 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1326 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1327 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1328 nrow = ailen[brow]; 1329 for (l=0; l<n; l++) { /* loop over columns */ 1330 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1331 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1332 col = in[l] ; 1333 bcol = col/bs; 1334 cidx = col%bs; 1335 ridx = row%bs; 1336 high = nrow; 1337 low = 0; /* assume unsorted */ 1338 while (high-low > 5) { 1339 t = (low+high)/2; 1340 if (rp[t] > bcol) high = t; 1341 else low = t; 1342 } 1343 for (i=low; i<high; i++) { 1344 if (rp[i] > bcol) break; 1345 if (rp[i] == bcol) { 1346 *v++ = ap[bs2*i+bs*cidx+ridx]; 1347 goto finished; 1348 } 1349 } 1350 *v++ = zero; 1351 finished:; 1352 } 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #if defined(PETSC_USE_MAT_SINGLE) 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1360 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1361 { 1362 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1363 PetscErrorCode ierr; 1364 PetscInt i,N = m*n*b->bs2; 1365 MatScalar *vsingle; 1366 1367 PetscFunctionBegin; 1368 if (N > b->setvalueslen) { 1369 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1370 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1371 b->setvalueslen = N; 1372 } 1373 vsingle = b->setvaluescopy; 1374 for (i=0; i<N; i++) { 1375 vsingle[i] = v[i]; 1376 } 1377 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 #endif 1381 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1385 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1386 { 1387 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1388 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1389 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1390 PetscErrorCode ierr; 1391 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1392 PetscTruth roworiented=a->roworiented; 1393 const MatScalar *value = v; 1394 MatScalar *ap,*aa = a->a,*bap; 1395 1396 PetscFunctionBegin; 1397 if (roworiented) { 1398 stepval = (n-1)*bs; 1399 } else { 1400 stepval = (m-1)*bs; 1401 } 1402 for (k=0; k<m; k++) { /* loop over added rows */ 1403 row = im[k]; 1404 if (row < 0) continue; 1405 #if defined(PETSC_USE_DEBUG) 1406 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1407 #endif 1408 rp = aj + ai[row]; 1409 ap = aa + bs2*ai[row]; 1410 rmax = imax[row]; 1411 nrow = ailen[row]; 1412 low = 0; 1413 high = nrow; 1414 for (l=0; l<n; l++) { /* loop over added columns */ 1415 if (in[l] < 0) continue; 1416 #if defined(PETSC_USE_DEBUG) 1417 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1418 #endif 1419 col = in[l]; 1420 if (roworiented) { 1421 value = v + k*(stepval+bs)*bs + l*bs; 1422 } else { 1423 value = v + l*(stepval+bs)*bs + k*bs; 1424 } 1425 if (col <= lastcol) low = 0; else high = nrow; 1426 lastcol = col; 1427 while (high-low > 7) { 1428 t = (low+high)/2; 1429 if (rp[t] > col) high = t; 1430 else low = t; 1431 } 1432 for (i=low; i<high; i++) { 1433 if (rp[i] > col) break; 1434 if (rp[i] == col) { 1435 bap = ap + bs2*i; 1436 if (roworiented) { 1437 if (is == ADD_VALUES) { 1438 for (ii=0; ii<bs; ii++,value+=stepval) { 1439 for (jj=ii; jj<bs2; jj+=bs) { 1440 bap[jj] += *value++; 1441 } 1442 } 1443 } else { 1444 for (ii=0; ii<bs; ii++,value+=stepval) { 1445 for (jj=ii; jj<bs2; jj+=bs) { 1446 bap[jj] = *value++; 1447 } 1448 } 1449 } 1450 } else { 1451 if (is == ADD_VALUES) { 1452 for (ii=0; ii<bs; ii++,value+=stepval) { 1453 for (jj=0; jj<bs; jj++) { 1454 *bap++ += *value++; 1455 } 1456 } 1457 } else { 1458 for (ii=0; ii<bs; ii++,value+=stepval) { 1459 for (jj=0; jj<bs; jj++) { 1460 *bap++ = *value++; 1461 } 1462 } 1463 } 1464 } 1465 goto noinsert2; 1466 } 1467 } 1468 if (nonew == 1) goto noinsert2; 1469 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1470 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1471 N = nrow++ - 1; high++; 1472 /* shift up all the later entries in this row */ 1473 for (ii=N; ii>=i; ii--) { 1474 rp[ii+1] = rp[ii]; 1475 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1476 } 1477 if (N >= i) { 1478 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1479 } 1480 rp[i] = col; 1481 bap = ap + bs2*i; 1482 if (roworiented) { 1483 for (ii=0; ii<bs; ii++,value+=stepval) { 1484 for (jj=ii; jj<bs2; jj+=bs) { 1485 bap[jj] = *value++; 1486 } 1487 } 1488 } else { 1489 for (ii=0; ii<bs; ii++,value+=stepval) { 1490 for (jj=0; jj<bs; jj++) { 1491 *bap++ = *value++; 1492 } 1493 } 1494 } 1495 noinsert2:; 1496 low = i; 1497 } 1498 ailen[row] = nrow; 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1505 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1506 { 1507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1508 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1509 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1510 PetscErrorCode ierr; 1511 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1512 MatScalar *aa = a->a,*ap; 1513 PetscReal ratio=0.6; 1514 1515 PetscFunctionBegin; 1516 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1517 1518 if (m) rmax = ailen[0]; 1519 for (i=1; i<mbs; i++) { 1520 /* move each row back by the amount of empty slots (fshift) before it*/ 1521 fshift += imax[i-1] - ailen[i-1]; 1522 rmax = PetscMax(rmax,ailen[i]); 1523 if (fshift) { 1524 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1525 N = ailen[i]; 1526 for (j=0; j<N; j++) { 1527 ip[j-fshift] = ip[j]; 1528 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1529 } 1530 } 1531 ai[i] = ai[i-1] + ailen[i-1]; 1532 } 1533 if (mbs) { 1534 fshift += imax[mbs-1] - ailen[mbs-1]; 1535 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1536 } 1537 /* reset ilen and imax for each row */ 1538 for (i=0; i<mbs; i++) { 1539 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1540 } 1541 a->nz = ai[mbs]; 1542 1543 /* diagonals may have moved, so kill the diagonal pointers */ 1544 a->idiagvalid = PETSC_FALSE; 1545 if (fshift && a->diag) { 1546 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1547 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1548 a->diag = 0; 1549 } 1550 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1551 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1552 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1553 a->reallocs = 0; 1554 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1555 1556 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1557 if (a->compressedrow.use){ 1558 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1559 } 1560 1561 A->same_nonzero = PETSC_TRUE; 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /* 1566 This function returns an array of flags which indicate the locations of contiguous 1567 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1568 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1569 Assume: sizes should be long enough to hold all the values. 1570 */ 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1573 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1574 { 1575 PetscInt i,j,k,row; 1576 PetscTruth flg; 1577 1578 PetscFunctionBegin; 1579 for (i=0,j=0; i<n; j++) { 1580 row = idx[i]; 1581 if (row%bs!=0) { /* Not the begining of a block */ 1582 sizes[j] = 1; 1583 i++; 1584 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1585 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1586 i++; 1587 } else { /* Begining of the block, so check if the complete block exists */ 1588 flg = PETSC_TRUE; 1589 for (k=1; k<bs; k++) { 1590 if (row+k != idx[i+k]) { /* break in the block */ 1591 flg = PETSC_FALSE; 1592 break; 1593 } 1594 } 1595 if (flg) { /* No break in the bs */ 1596 sizes[j] = bs; 1597 i+= bs; 1598 } else { 1599 sizes[j] = 1; 1600 i++; 1601 } 1602 } 1603 } 1604 *bs_max = j; 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1610 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1611 { 1612 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 PetscInt i,j,k,count,*rows; 1615 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1616 PetscScalar zero = 0.0; 1617 MatScalar *aa; 1618 1619 PetscFunctionBegin; 1620 /* Make a copy of the IS and sort it */ 1621 /* allocate memory for rows,sizes */ 1622 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1623 sizes = rows + is_n; 1624 1625 /* copy IS values to rows, and sort them */ 1626 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1627 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1628 if (baij->keepzeroedrows) { 1629 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1630 bs_max = is_n; 1631 A->same_nonzero = PETSC_TRUE; 1632 } else { 1633 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1634 A->same_nonzero = PETSC_FALSE; 1635 } 1636 1637 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1638 row = rows[j]; 1639 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1640 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1641 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1642 if (sizes[i] == bs && !baij->keepzeroedrows) { 1643 if (diag != 0.0) { 1644 if (baij->ilen[row/bs] > 0) { 1645 baij->ilen[row/bs] = 1; 1646 baij->j[baij->i[row/bs]] = row/bs; 1647 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1648 } 1649 /* Now insert all the diagonal values for this bs */ 1650 for (k=0; k<bs; k++) { 1651 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 } else { /* (diag == 0.0) */ 1654 baij->ilen[row/bs] = 0; 1655 } /* end (diag == 0.0) */ 1656 } else { /* (sizes[i] != bs) */ 1657 #if defined (PETSC_USE_DEBUG) 1658 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1659 #endif 1660 for (k=0; k<count; k++) { 1661 aa[0] = zero; 1662 aa += bs; 1663 } 1664 if (diag != 0.0) { 1665 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1666 } 1667 } 1668 } 1669 1670 ierr = PetscFree(rows);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1678 { 1679 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1680 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1681 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1682 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1683 PetscErrorCode ierr; 1684 PetscInt ridx,cidx,bs2=a->bs2; 1685 PetscTruth roworiented=a->roworiented; 1686 MatScalar *ap,value,*aa=a->a,*bap; 1687 1688 PetscFunctionBegin; 1689 for (k=0; k<m; k++) { /* loop over added rows */ 1690 row = im[k]; 1691 brow = row/bs; 1692 if (row < 0) continue; 1693 #if defined(PETSC_USE_DEBUG) 1694 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1695 #endif 1696 rp = aj + ai[brow]; 1697 ap = aa + bs2*ai[brow]; 1698 rmax = imax[brow]; 1699 nrow = ailen[brow]; 1700 low = 0; 1701 high = nrow; 1702 for (l=0; l<n; l++) { /* loop over added columns */ 1703 if (in[l] < 0) continue; 1704 #if defined(PETSC_USE_DEBUG) 1705 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1706 #endif 1707 col = in[l]; bcol = col/bs; 1708 ridx = row % bs; cidx = col % bs; 1709 if (roworiented) { 1710 value = v[l + k*n]; 1711 } else { 1712 value = v[k + l*m]; 1713 } 1714 if (col <= lastcol) low = 0; else high = nrow; 1715 lastcol = col; 1716 while (high-low > 7) { 1717 t = (low+high)/2; 1718 if (rp[t] > bcol) high = t; 1719 else low = t; 1720 } 1721 for (i=low; i<high; i++) { 1722 if (rp[i] > bcol) break; 1723 if (rp[i] == bcol) { 1724 bap = ap + bs2*i + bs*cidx + ridx; 1725 if (is == ADD_VALUES) *bap += value; 1726 else *bap = value; 1727 goto noinsert1; 1728 } 1729 } 1730 if (nonew == 1) goto noinsert1; 1731 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1732 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1733 N = nrow++ - 1; high++; 1734 /* shift up all the later entries in this row */ 1735 for (ii=N; ii>=i; ii--) { 1736 rp[ii+1] = rp[ii]; 1737 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1738 } 1739 if (N>=i) { 1740 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1741 } 1742 rp[i] = bcol; 1743 ap[bs2*i + bs*cidx + ridx] = value; 1744 a->nz++; 1745 noinsert1:; 1746 low = i; 1747 } 1748 ailen[brow] = nrow; 1749 } 1750 A->same_nonzero = PETSC_FALSE; 1751 PetscFunctionReturn(0); 1752 } 1753 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1757 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1758 { 1759 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1760 Mat outA; 1761 PetscErrorCode ierr; 1762 PetscTruth row_identity,col_identity; 1763 1764 PetscFunctionBegin; 1765 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1766 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1767 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1768 if (!row_identity || !col_identity) { 1769 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1770 } 1771 1772 outA = inA; 1773 inA->factor = FACTOR_LU; 1774 1775 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1776 1777 a->row = row; 1778 a->col = col; 1779 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1780 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1781 1782 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1783 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1784 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1785 1786 /* 1787 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1788 for ILU(0) factorization with natural ordering 1789 */ 1790 if (inA->rmap.bs < 8) { 1791 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1792 } else { 1793 if (!a->solve_work) { 1794 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1795 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1796 } 1797 } 1798 1799 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1800 1801 PetscFunctionReturn(0); 1802 } 1803 1804 EXTERN_C_BEGIN 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1807 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1808 { 1809 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1810 PetscInt i,nz,nbs; 1811 1812 PetscFunctionBegin; 1813 nz = baij->maxnz/baij->bs2; 1814 nbs = baij->nbs; 1815 for (i=0; i<nz; i++) { 1816 baij->j[i] = indices[i]; 1817 } 1818 baij->nz = nz; 1819 for (i=0; i<nbs; i++) { 1820 baij->ilen[i] = baij->imax[i]; 1821 } 1822 1823 PetscFunctionReturn(0); 1824 } 1825 EXTERN_C_END 1826 1827 #undef __FUNCT__ 1828 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1829 /*@ 1830 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1831 in the matrix. 1832 1833 Input Parameters: 1834 + mat - the SeqBAIJ matrix 1835 - indices - the column indices 1836 1837 Level: advanced 1838 1839 Notes: 1840 This can be called if you have precomputed the nonzero structure of the 1841 matrix and want to provide it to the matrix object to improve the performance 1842 of the MatSetValues() operation. 1843 1844 You MUST have set the correct numbers of nonzeros per row in the call to 1845 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1846 1847 MUST be called before any calls to MatSetValues(); 1848 1849 @*/ 1850 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1851 { 1852 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1853 1854 PetscFunctionBegin; 1855 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1856 PetscValidPointer(indices,2); 1857 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1858 if (f) { 1859 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1860 } else { 1861 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1868 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1869 { 1870 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1871 PetscErrorCode ierr; 1872 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1873 PetscReal atmp; 1874 PetscScalar *x,zero = 0.0; 1875 MatScalar *aa; 1876 PetscInt ncols,brow,krow,kcol; 1877 1878 PetscFunctionBegin; 1879 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1880 bs = A->rmap.bs; 1881 aa = a->a; 1882 ai = a->i; 1883 aj = a->j; 1884 mbs = a->mbs; 1885 1886 ierr = VecSet(v,zero);CHKERRQ(ierr); 1887 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1888 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1889 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1890 for (i=0; i<mbs; i++) { 1891 ncols = ai[1] - ai[0]; ai++; 1892 brow = bs*i; 1893 for (j=0; j<ncols; j++){ 1894 /* bcol = bs*(*aj); */ 1895 for (kcol=0; kcol<bs; kcol++){ 1896 for (krow=0; krow<bs; krow++){ 1897 atmp = PetscAbsScalar(*aa); aa++; 1898 row = brow + krow; /* row index */ 1899 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1900 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1901 } 1902 } 1903 aj++; 1904 } 1905 } 1906 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatCopy_SeqBAIJ" 1912 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1913 { 1914 PetscErrorCode ierr; 1915 1916 PetscFunctionBegin; 1917 /* If the two matrices have the same copy implementation, use fast copy. */ 1918 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1919 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1920 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1921 1922 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1923 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1924 } 1925 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1926 } else { 1927 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1934 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1935 { 1936 PetscErrorCode ierr; 1937 1938 PetscFunctionBegin; 1939 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1945 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1946 { 1947 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1948 PetscFunctionBegin; 1949 *array = a->a; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1955 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1956 { 1957 PetscFunctionBegin; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #include "petscblaslapack.h" 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1964 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1965 { 1966 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1967 PetscErrorCode ierr; 1968 PetscInt i,bs=Y->rmap.bs,j,bs2; 1969 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1970 1971 PetscFunctionBegin; 1972 if (str == SAME_NONZERO_PATTERN) { 1973 PetscScalar alpha = a; 1974 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1975 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1976 if (y->xtoy && y->XtoY != X) { 1977 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1978 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1979 } 1980 if (!y->xtoy) { /* get xtoy */ 1981 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1982 y->XtoY = X; 1983 } 1984 bs2 = bs*bs; 1985 for (i=0; i<x->nz; i++) { 1986 j = 0; 1987 while (j < bs2){ 1988 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1989 j++; 1990 } 1991 } 1992 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1993 } else { 1994 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2001 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2002 { 2003 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2004 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2005 PetscScalar *aa = a->a; 2006 2007 PetscFunctionBegin; 2008 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 #undef __FUNCT__ 2013 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2014 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2015 { 2016 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2017 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2018 PetscScalar *aa = a->a; 2019 2020 PetscFunctionBegin; 2021 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 2026 /* -------------------------------------------------------------------*/ 2027 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2028 MatGetRow_SeqBAIJ, 2029 MatRestoreRow_SeqBAIJ, 2030 MatMult_SeqBAIJ_N, 2031 /* 4*/ MatMultAdd_SeqBAIJ_N, 2032 MatMultTranspose_SeqBAIJ, 2033 MatMultTransposeAdd_SeqBAIJ, 2034 MatSolve_SeqBAIJ_N, 2035 0, 2036 0, 2037 /*10*/ 0, 2038 MatLUFactor_SeqBAIJ, 2039 0, 2040 0, 2041 MatTranspose_SeqBAIJ, 2042 /*15*/ MatGetInfo_SeqBAIJ, 2043 MatEqual_SeqBAIJ, 2044 MatGetDiagonal_SeqBAIJ, 2045 MatDiagonalScale_SeqBAIJ, 2046 MatNorm_SeqBAIJ, 2047 /*20*/ 0, 2048 MatAssemblyEnd_SeqBAIJ, 2049 0, 2050 MatSetOption_SeqBAIJ, 2051 MatZeroEntries_SeqBAIJ, 2052 /*25*/ MatZeroRows_SeqBAIJ, 2053 MatLUFactorSymbolic_SeqBAIJ, 2054 MatLUFactorNumeric_SeqBAIJ_N, 2055 MatCholeskyFactorSymbolic_SeqBAIJ, 2056 MatCholeskyFactorNumeric_SeqBAIJ_N, 2057 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2058 MatILUFactorSymbolic_SeqBAIJ, 2059 MatICCFactorSymbolic_SeqBAIJ, 2060 MatGetArray_SeqBAIJ, 2061 MatRestoreArray_SeqBAIJ, 2062 /*35*/ MatDuplicate_SeqBAIJ, 2063 0, 2064 0, 2065 MatILUFactor_SeqBAIJ, 2066 0, 2067 /*40*/ MatAXPY_SeqBAIJ, 2068 MatGetSubMatrices_SeqBAIJ, 2069 MatIncreaseOverlap_SeqBAIJ, 2070 MatGetValues_SeqBAIJ, 2071 MatCopy_SeqBAIJ, 2072 /*45*/ 0, 2073 MatScale_SeqBAIJ, 2074 0, 2075 0, 2076 0, 2077 /*50*/ 0, 2078 MatGetRowIJ_SeqBAIJ, 2079 MatRestoreRowIJ_SeqBAIJ, 2080 0, 2081 0, 2082 /*55*/ 0, 2083 0, 2084 0, 2085 0, 2086 MatSetValuesBlocked_SeqBAIJ, 2087 /*60*/ MatGetSubMatrix_SeqBAIJ, 2088 MatDestroy_SeqBAIJ, 2089 MatView_SeqBAIJ, 2090 0, 2091 0, 2092 /*65*/ 0, 2093 0, 2094 0, 2095 0, 2096 0, 2097 /*70*/ MatGetRowMax_SeqBAIJ, 2098 MatConvert_Basic, 2099 0, 2100 0, 2101 0, 2102 /*75*/ 0, 2103 0, 2104 0, 2105 0, 2106 0, 2107 /*80*/ 0, 2108 0, 2109 0, 2110 0, 2111 MatLoad_SeqBAIJ, 2112 /*85*/ 0, 2113 0, 2114 0, 2115 0, 2116 0, 2117 /*90*/ 0, 2118 0, 2119 0, 2120 0, 2121 0, 2122 /*95*/ 0, 2123 0, 2124 0, 2125 0, 2126 0, 2127 /*100*/0, 2128 0, 2129 0, 2130 0, 2131 0, 2132 /*105*/0, 2133 MatRealPart_SeqBAIJ, 2134 MatImaginaryPart_SeqBAIJ 2135 }; 2136 2137 EXTERN_C_BEGIN 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2140 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2141 { 2142 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2143 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 if (aij->nonew != 1) { 2148 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2149 } 2150 2151 /* allocate space for values if not already there */ 2152 if (!aij->saved_values) { 2153 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2154 } 2155 2156 /* copy values over */ 2157 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 EXTERN_C_END 2161 2162 EXTERN_C_BEGIN 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2165 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2166 { 2167 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2168 PetscErrorCode ierr; 2169 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2170 2171 PetscFunctionBegin; 2172 if (aij->nonew != 1) { 2173 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2174 } 2175 if (!aij->saved_values) { 2176 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2177 } 2178 2179 /* copy values over */ 2180 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2181 PetscFunctionReturn(0); 2182 } 2183 EXTERN_C_END 2184 2185 EXTERN_C_BEGIN 2186 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2187 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2188 EXTERN_C_END 2189 2190 EXTERN_C_BEGIN 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2193 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2194 { 2195 Mat_SeqBAIJ *b; 2196 PetscErrorCode ierr; 2197 PetscInt i,mbs,nbs,bs2,newbs = bs; 2198 PetscTruth flg,skipallocation = PETSC_FALSE; 2199 2200 PetscFunctionBegin; 2201 2202 if (nz == MAT_SKIP_ALLOCATION) { 2203 skipallocation = PETSC_TRUE; 2204 nz = 0; 2205 } 2206 2207 ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2208 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2209 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2210 2211 if (nnz && newbs != bs) { 2212 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2213 } 2214 bs = newbs; 2215 2216 B->rmap.bs = B->cmap.bs = bs; 2217 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2218 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2219 2220 B->preallocated = PETSC_TRUE; 2221 2222 mbs = B->rmap.n/bs; 2223 nbs = B->cmap.n/bs; 2224 bs2 = bs*bs; 2225 2226 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2227 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2228 } 2229 2230 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2231 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2232 if (nnz) { 2233 for (i=0; i<mbs; i++) { 2234 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2235 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2236 } 2237 } 2238 2239 b = (Mat_SeqBAIJ*)B->data; 2240 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2241 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2242 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2243 2244 B->ops->solve = MatSolve_SeqBAIJ_Update; 2245 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2246 if (!flg) { 2247 switch (bs) { 2248 case 1: 2249 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2250 B->ops->mult = MatMult_SeqBAIJ_1; 2251 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2252 break; 2253 case 2: 2254 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2255 B->ops->mult = MatMult_SeqBAIJ_2; 2256 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2257 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2258 break; 2259 case 3: 2260 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2261 B->ops->mult = MatMult_SeqBAIJ_3; 2262 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2263 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2264 break; 2265 case 4: 2266 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2267 B->ops->mult = MatMult_SeqBAIJ_4; 2268 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2269 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2270 break; 2271 case 5: 2272 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2273 B->ops->mult = MatMult_SeqBAIJ_5; 2274 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2275 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2276 break; 2277 case 6: 2278 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2279 B->ops->mult = MatMult_SeqBAIJ_6; 2280 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2281 break; 2282 case 7: 2283 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2284 B->ops->mult = MatMult_SeqBAIJ_7; 2285 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2286 break; 2287 default: 2288 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2289 B->ops->mult = MatMult_SeqBAIJ_N; 2290 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2291 break; 2292 } 2293 } 2294 B->rmap.bs = bs; 2295 b->mbs = mbs; 2296 b->nbs = nbs; 2297 if (!skipallocation) { 2298 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2299 /* b->ilen will count nonzeros in each block row so far. */ 2300 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2301 if (!nnz) { 2302 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2303 else if (nz <= 0) nz = 1; 2304 for (i=0; i<mbs; i++) b->imax[i] = nz; 2305 nz = nz*mbs; 2306 } else { 2307 nz = 0; 2308 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2309 } 2310 2311 /* allocate the matrix space */ 2312 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2313 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2314 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2315 b->singlemalloc = PETSC_TRUE; 2316 2317 b->i[0] = 0; 2318 for (i=1; i<mbs+1; i++) { 2319 b->i[i] = b->i[i-1] + b->imax[i-1]; 2320 } 2321 b->free_a = PETSC_TRUE; 2322 b->free_ij = PETSC_TRUE; 2323 } else { 2324 b->free_a = PETSC_FALSE; 2325 b->free_ij = PETSC_FALSE; 2326 } 2327 2328 B->rmap.bs = bs; 2329 b->bs2 = bs2; 2330 b->mbs = mbs; 2331 b->nz = 0; 2332 b->maxnz = nz*bs2; 2333 B->info.nz_unneeded = (PetscReal)b->maxnz; 2334 PetscFunctionReturn(0); 2335 } 2336 EXTERN_C_END 2337 2338 /*MC 2339 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2340 block sparse compressed row format. 2341 2342 Options Database Keys: 2343 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2344 2345 Level: beginner 2346 2347 .seealso: MatCreateSeqBAIJ() 2348 M*/ 2349 2350 EXTERN_C_BEGIN 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatCreate_SeqBAIJ" 2353 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2354 { 2355 PetscErrorCode ierr; 2356 PetscMPIInt size; 2357 Mat_SeqBAIJ *b; 2358 2359 PetscFunctionBegin; 2360 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2361 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2362 2363 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2364 B->data = (void*)b; 2365 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2366 B->factor = 0; 2367 B->mapping = 0; 2368 b->row = 0; 2369 b->col = 0; 2370 b->icol = 0; 2371 b->reallocs = 0; 2372 b->saved_values = 0; 2373 #if defined(PETSC_USE_MAT_SINGLE) 2374 b->setvalueslen = 0; 2375 b->setvaluescopy = PETSC_NULL; 2376 #endif 2377 2378 b->sorted = PETSC_FALSE; 2379 b->roworiented = PETSC_TRUE; 2380 b->nonew = 0; 2381 b->diag = 0; 2382 b->solve_work = 0; 2383 b->mult_work = 0; 2384 B->spptr = 0; 2385 B->info.nz_unneeded = (PetscReal)b->maxnz; 2386 b->keepzeroedrows = PETSC_FALSE; 2387 b->xtoy = 0; 2388 b->XtoY = 0; 2389 b->compressedrow.use = PETSC_FALSE; 2390 b->compressedrow.nrows = 0; 2391 b->compressedrow.i = PETSC_NULL; 2392 b->compressedrow.rindex = PETSC_NULL; 2393 b->compressedrow.checked = PETSC_FALSE; 2394 B->same_nonzero = PETSC_FALSE; 2395 2396 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2397 "MatInvertBlockDiagonal_SeqBAIJ", 2398 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2399 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2400 "MatStoreValues_SeqBAIJ", 2401 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2402 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2403 "MatRetrieveValues_SeqBAIJ", 2404 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2405 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2406 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2407 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2408 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2409 "MatConvert_SeqBAIJ_SeqAIJ", 2410 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2412 "MatConvert_SeqBAIJ_SeqSBAIJ", 2413 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2415 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2416 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2417 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 EXTERN_C_END 2421 2422 #undef __FUNCT__ 2423 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2424 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2425 { 2426 Mat C; 2427 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2428 PetscErrorCode ierr; 2429 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2430 2431 PetscFunctionBegin; 2432 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2433 2434 *B = 0; 2435 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2436 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2437 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2438 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2439 c = (Mat_SeqBAIJ*)C->data; 2440 2441 C->rmap.N = A->rmap.N; 2442 C->cmap.N = A->cmap.N; 2443 C->rmap.bs = A->rmap.bs; 2444 c->bs2 = a->bs2; 2445 c->mbs = a->mbs; 2446 c->nbs = a->nbs; 2447 2448 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2449 for (i=0; i<mbs; i++) { 2450 c->imax[i] = a->imax[i]; 2451 c->ilen[i] = a->ilen[i]; 2452 } 2453 2454 /* allocate the matrix space */ 2455 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2456 c->singlemalloc = PETSC_TRUE; 2457 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2458 if (mbs > 0) { 2459 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2460 if (cpvalues == MAT_COPY_VALUES) { 2461 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2462 } else { 2463 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2464 } 2465 } 2466 c->sorted = a->sorted; 2467 c->roworiented = a->roworiented; 2468 c->nonew = a->nonew; 2469 2470 if (a->diag) { 2471 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2472 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2473 for (i=0; i<mbs; i++) { 2474 c->diag[i] = a->diag[i]; 2475 } 2476 } else c->diag = 0; 2477 c->nz = a->nz; 2478 c->maxnz = a->maxnz; 2479 c->solve_work = 0; 2480 c->mult_work = 0; 2481 c->free_a = PETSC_TRUE; 2482 c->free_ij = PETSC_TRUE; 2483 C->preallocated = PETSC_TRUE; 2484 C->assembled = PETSC_TRUE; 2485 2486 c->compressedrow.use = a->compressedrow.use; 2487 c->compressedrow.nrows = a->compressedrow.nrows; 2488 c->compressedrow.checked = a->compressedrow.checked; 2489 if ( a->compressedrow.checked && a->compressedrow.use){ 2490 i = a->compressedrow.nrows; 2491 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2492 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2493 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2494 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2495 } else { 2496 c->compressedrow.use = PETSC_FALSE; 2497 c->compressedrow.i = PETSC_NULL; 2498 c->compressedrow.rindex = PETSC_NULL; 2499 } 2500 C->same_nonzero = A->same_nonzero; 2501 *B = C; 2502 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatLoad_SeqBAIJ" 2508 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2509 { 2510 Mat_SeqBAIJ *a; 2511 Mat B; 2512 PetscErrorCode ierr; 2513 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2514 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2515 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2516 PetscInt *masked,nmask,tmp,bs2,ishift; 2517 PetscMPIInt size; 2518 int fd; 2519 PetscScalar *aa; 2520 MPI_Comm comm = ((PetscObject)viewer)->comm; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2524 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2525 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2526 bs2 = bs*bs; 2527 2528 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2529 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2530 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2531 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2532 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2533 M = header[1]; N = header[2]; nz = header[3]; 2534 2535 if (header[3] < 0) { 2536 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2537 } 2538 2539 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2540 2541 /* 2542 This code adds extra rows to make sure the number of rows is 2543 divisible by the blocksize 2544 */ 2545 mbs = M/bs; 2546 extra_rows = bs - M + bs*(mbs); 2547 if (extra_rows == bs) extra_rows = 0; 2548 else mbs++; 2549 if (extra_rows) { 2550 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2551 } 2552 2553 /* read in row lengths */ 2554 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2555 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2556 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2557 2558 /* read in column indices */ 2559 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2560 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2561 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2562 2563 /* loop over row lengths determining block row lengths */ 2564 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2565 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2566 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2567 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2568 masked = mask + mbs; 2569 rowcount = 0; nzcount = 0; 2570 for (i=0; i<mbs; i++) { 2571 nmask = 0; 2572 for (j=0; j<bs; j++) { 2573 kmax = rowlengths[rowcount]; 2574 for (k=0; k<kmax; k++) { 2575 tmp = jj[nzcount++]/bs; 2576 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2577 } 2578 rowcount++; 2579 } 2580 browlengths[i] += nmask; 2581 /* zero out the mask elements we set */ 2582 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2583 } 2584 2585 /* create our matrix */ 2586 ierr = MatCreate(comm,&B); 2587 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2588 ierr = MatSetType(B,type);CHKERRQ(ierr); 2589 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2590 a = (Mat_SeqBAIJ*)B->data; 2591 2592 /* set matrix "i" values */ 2593 a->i[0] = 0; 2594 for (i=1; i<= mbs; i++) { 2595 a->i[i] = a->i[i-1] + browlengths[i-1]; 2596 a->ilen[i-1] = browlengths[i-1]; 2597 } 2598 a->nz = 0; 2599 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2600 2601 /* read in nonzero values */ 2602 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2603 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2604 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2605 2606 /* set "a" and "j" values into matrix */ 2607 nzcount = 0; jcount = 0; 2608 for (i=0; i<mbs; i++) { 2609 nzcountb = nzcount; 2610 nmask = 0; 2611 for (j=0; j<bs; j++) { 2612 kmax = rowlengths[i*bs+j]; 2613 for (k=0; k<kmax; k++) { 2614 tmp = jj[nzcount++]/bs; 2615 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2616 } 2617 } 2618 /* sort the masked values */ 2619 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2620 2621 /* set "j" values into matrix */ 2622 maskcount = 1; 2623 for (j=0; j<nmask; j++) { 2624 a->j[jcount++] = masked[j]; 2625 mask[masked[j]] = maskcount++; 2626 } 2627 /* set "a" values into matrix */ 2628 ishift = bs2*a->i[i]; 2629 for (j=0; j<bs; j++) { 2630 kmax = rowlengths[i*bs+j]; 2631 for (k=0; k<kmax; k++) { 2632 tmp = jj[nzcountb]/bs ; 2633 block = mask[tmp] - 1; 2634 point = jj[nzcountb] - bs*tmp; 2635 idx = ishift + bs2*block + j + bs*point; 2636 a->a[idx] = (MatScalar)aa[nzcountb++]; 2637 } 2638 } 2639 /* zero out the mask elements we set */ 2640 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2641 } 2642 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2643 2644 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2645 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2646 ierr = PetscFree(aa);CHKERRQ(ierr); 2647 ierr = PetscFree(jj);CHKERRQ(ierr); 2648 ierr = PetscFree(mask);CHKERRQ(ierr); 2649 2650 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2652 ierr = MatView_Private(B);CHKERRQ(ierr); 2653 2654 *A = B; 2655 PetscFunctionReturn(0); 2656 } 2657 2658 #undef __FUNCT__ 2659 #define __FUNCT__ "MatCreateSeqBAIJ" 2660 /*@C 2661 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2662 compressed row) format. For good matrix assembly performance the 2663 user should preallocate the matrix storage by setting the parameter nz 2664 (or the array nnz). By setting these parameters accurately, performance 2665 during matrix assembly can be increased by more than a factor of 50. 2666 2667 Collective on MPI_Comm 2668 2669 Input Parameters: 2670 + comm - MPI communicator, set to PETSC_COMM_SELF 2671 . bs - size of block 2672 . m - number of rows 2673 . n - number of columns 2674 . nz - number of nonzero blocks per block row (same for all rows) 2675 - nnz - array containing the number of nonzero blocks in the various block rows 2676 (possibly different for each block row) or PETSC_NULL 2677 2678 Output Parameter: 2679 . A - the matrix 2680 2681 Options Database Keys: 2682 . -mat_no_unroll - uses code that does not unroll the loops in the 2683 block calculations (much slower) 2684 . -mat_block_size - size of the blocks to use 2685 2686 Level: intermediate 2687 2688 Notes: 2689 The number of rows and columns must be divisible by blocksize. 2690 2691 If the nnz parameter is given then the nz parameter is ignored 2692 2693 A nonzero block is any block that as 1 or more nonzeros in it 2694 2695 The block AIJ format is fully compatible with standard Fortran 77 2696 storage. That is, the stored row and column indices can begin at 2697 either one (as in Fortran) or zero. See the users' manual for details. 2698 2699 Specify the preallocated storage with either nz or nnz (not both). 2700 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2701 allocation. For additional details, see the users manual chapter on 2702 matrices. 2703 2704 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2705 @*/ 2706 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2707 { 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2712 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2713 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2714 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2720 /*@C 2721 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2722 per row in the matrix. For good matrix assembly performance the 2723 user should preallocate the matrix storage by setting the parameter nz 2724 (or the array nnz). By setting these parameters accurately, performance 2725 during matrix assembly can be increased by more than a factor of 50. 2726 2727 Collective on MPI_Comm 2728 2729 Input Parameters: 2730 + A - the matrix 2731 . bs - size of block 2732 . nz - number of block nonzeros per block row (same for all rows) 2733 - nnz - array containing the number of block nonzeros in the various block rows 2734 (possibly different for each block row) or PETSC_NULL 2735 2736 Options Database Keys: 2737 . -mat_no_unroll - uses code that does not unroll the loops in the 2738 block calculations (much slower) 2739 . -mat_block_size - size of the blocks to use 2740 2741 Level: intermediate 2742 2743 Notes: 2744 If the nnz parameter is given then the nz parameter is ignored 2745 2746 The block AIJ format is fully compatible with standard Fortran 77 2747 storage. That is, the stored row and column indices can begin at 2748 either one (as in Fortran) or zero. See the users' manual for details. 2749 2750 Specify the preallocated storage with either nz or nnz (not both). 2751 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2752 allocation. For additional details, see the users manual chapter on 2753 matrices. 2754 2755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2756 @*/ 2757 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2758 { 2759 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2760 2761 PetscFunctionBegin; 2762 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2763 if (f) { 2764 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2765 } 2766 PetscFunctionReturn(0); 2767 } 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2771 /*@ 2772 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2773 (upper triangular entries in CSR format) provided by the user. 2774 2775 Collective on MPI_Comm 2776 2777 Input Parameters: 2778 + comm - must be an MPI communicator of size 1 2779 . bs - size of block 2780 . m - number of rows 2781 . n - number of columns 2782 . i - row indices 2783 . j - column indices 2784 - a - matrix values 2785 2786 Output Parameter: 2787 . mat - the matrix 2788 2789 Level: intermediate 2790 2791 Notes: 2792 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2793 once the matrix is destroyed 2794 2795 You cannot set new nonzero locations into this matrix, that will generate an error. 2796 2797 The i and j indices are 0 based 2798 2799 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2800 2801 @*/ 2802 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2803 { 2804 PetscErrorCode ierr; 2805 PetscInt ii; 2806 Mat_SeqBAIJ *baij; 2807 2808 PetscFunctionBegin; 2809 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2810 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2811 2812 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2813 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2814 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2815 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2816 baij = (Mat_SeqBAIJ*)(*mat)->data; 2817 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2818 2819 baij->i = i; 2820 baij->j = j; 2821 baij->a = a; 2822 baij->singlemalloc = PETSC_FALSE; 2823 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2824 baij->free_a = PETSC_FALSE; 2825 baij->free_ij = PETSC_FALSE; 2826 2827 for (ii=0; ii<m; ii++) { 2828 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2829 #if defined(PETSC_USE_DEBUG) 2830 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2831 #endif 2832 } 2833 #if defined(PETSC_USE_DEBUG) 2834 for (ii=0; ii<baij->i[m]; ii++) { 2835 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2836 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2837 } 2838 #endif 2839 2840 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2842 PetscFunctionReturn(0); 2843 } 2844