1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 its = its*lits; 129 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 130 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 131 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 132 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 133 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 134 135 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 136 137 diag = a->diag; 138 idiag = a->idiag; 139 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 140 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 141 142 if (flag & SOR_ZERO_INITIAL_GUESS) { 143 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 144 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 145 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 146 i2 = 2; 147 idiag += 4; 148 for (i=1; i<m; i++) { 149 v = aa + 4*ai[i]; 150 vi = aj + ai[i]; 151 nz = diag[i] - ai[i]; 152 s1 = b[i2]; s2 = b[i2+1]; 153 while (nz--) { 154 idx = 2*(*vi++); 155 x1 = x[idx]; x2 = x[1+idx]; 156 s1 -= v[0]*x1 + v[2]*x2; 157 s2 -= v[1]*x1 + v[3]*x2; 158 v += 4; 159 } 160 x[i2] = idiag[0]*s1 + idiag[2]*s2; 161 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 162 idiag += 4; 163 i2 += 2; 164 } 165 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 166 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 167 } 168 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 169 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 170 i2 = 0; 171 mdiag = a->idiag+4*a->mbs; 172 for (i=0; i<m; i++) { 173 x1 = x[i2]; x2 = x[i2+1]; 174 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 175 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 176 mdiag += 4; 177 i2 += 2; 178 } 179 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 180 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 181 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 182 } 183 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 184 idiag = a->idiag+4*a->mbs - 4; 185 i2 = 2*m - 2; 186 x1 = x[i2]; x2 = x[i2+1]; 187 x[i2] = idiag[0]*x1 + idiag[2]*x2; 188 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 189 idiag -= 4; 190 i2 -= 2; 191 for (i=m-2; i>=0; i--) { 192 v = aa + 4*(diag[i]+1); 193 vi = aj + diag[i] + 1; 194 nz = ai[i+1] - diag[i] - 1; 195 s1 = x[i2]; s2 = x[i2+1]; 196 while (nz--) { 197 idx = 2*(*vi++); 198 x1 = x[idx]; x2 = x[1+idx]; 199 s1 -= v[0]*x1 + v[2]*x2; 200 s2 -= v[1]*x1 + v[3]*x2; 201 v += 4; 202 } 203 x[i2] = idiag[0]*s1 + idiag[2]*s2; 204 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 205 idiag -= 4; 206 i2 -= 2; 207 } 208 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 209 } 210 } else { 211 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 212 } 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 221 { 222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 223 PetscScalar *x,x1,x2,x3,s1,s2,s3; 224 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 225 PetscErrorCode ierr; 226 PetscInt m = a->mbs,i,i2,nz,idx; 227 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 228 229 PetscFunctionBegin; 230 its = its*lits; 231 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 232 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 233 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 234 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 235 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 236 237 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 238 239 diag = a->diag; 240 idiag = a->idiag; 241 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 242 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 243 244 if (flag & SOR_ZERO_INITIAL_GUESS) { 245 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 246 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 247 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 248 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 249 i2 = 3; 250 idiag += 9; 251 for (i=1; i<m; i++) { 252 v = aa + 9*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 256 while (nz--) { 257 idx = 3*(*vi++); 258 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 259 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 260 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 261 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 262 v += 9; 263 } 264 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 265 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 266 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 267 idiag += 9; 268 i2 += 3; 269 } 270 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 271 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 272 } 273 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 274 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 275 i2 = 0; 276 mdiag = a->idiag+9*a->mbs; 277 for (i=0; i<m; i++) { 278 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 279 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 280 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 281 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 282 mdiag += 9; 283 i2 += 3; 284 } 285 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 286 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 287 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 288 } 289 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 290 idiag = a->idiag+9*a->mbs - 9; 291 i2 = 3*m - 3; 292 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 293 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 294 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 295 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 296 idiag -= 9; 297 i2 -= 3; 298 for (i=m-2; i>=0; i--) { 299 v = aa + 9*(diag[i]+1); 300 vi = aj + diag[i] + 1; 301 nz = ai[i+1] - diag[i] - 1; 302 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 303 while (nz--) { 304 idx = 3*(*vi++); 305 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 306 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 307 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 308 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 309 v += 9; 310 } 311 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 312 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 313 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 314 idiag -= 9; 315 i2 -= 3; 316 } 317 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 318 } 319 } else { 320 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 321 } 322 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 323 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 332 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 333 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 334 PetscErrorCode ierr; 335 PetscInt m = a->mbs,i,i2,nz,idx; 336 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 337 338 PetscFunctionBegin; 339 its = its*lits; 340 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 341 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 342 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 343 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 344 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 345 346 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 347 348 diag = a->diag; 349 idiag = a->idiag; 350 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 352 353 if (flag & SOR_ZERO_INITIAL_GUESS) { 354 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 355 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 356 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 357 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 358 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 359 i2 = 4; 360 idiag += 16; 361 for (i=1; i<m; i++) { 362 v = aa + 16*ai[i]; 363 vi = aj + ai[i]; 364 nz = diag[i] - ai[i]; 365 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 366 while (nz--) { 367 idx = 4*(*vi++); 368 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 369 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 370 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 371 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 372 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 373 v += 16; 374 } 375 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 376 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 377 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 378 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 379 idiag += 16; 380 i2 += 4; 381 } 382 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 383 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 384 } 385 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 386 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 387 i2 = 0; 388 mdiag = a->idiag+16*a->mbs; 389 for (i=0; i<m; i++) { 390 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 391 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 392 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 393 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 394 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 395 mdiag += 16; 396 i2 += 4; 397 } 398 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 399 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 400 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 401 } 402 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 403 idiag = a->idiag+16*a->mbs - 16; 404 i2 = 4*m - 4; 405 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 406 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 407 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 408 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 409 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 410 idiag -= 16; 411 i2 -= 4; 412 for (i=m-2; i>=0; i--) { 413 v = aa + 16*(diag[i]+1); 414 vi = aj + diag[i] + 1; 415 nz = ai[i+1] - diag[i] - 1; 416 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 417 while (nz--) { 418 idx = 4*(*vi++); 419 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 420 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 421 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 422 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 423 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 424 v += 16; 425 } 426 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 427 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 428 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 429 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 430 idiag -= 16; 431 i2 -= 4; 432 } 433 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 434 } 435 } else { 436 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 437 } 438 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 439 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 446 { 447 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 448 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 449 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 450 PetscErrorCode ierr; 451 PetscInt m = a->mbs,i,i2,nz,idx; 452 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 453 454 PetscFunctionBegin; 455 its = its*lits; 456 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 457 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 458 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 459 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 460 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 461 462 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 463 464 diag = a->diag; 465 idiag = a->idiag; 466 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 467 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 468 469 if (flag & SOR_ZERO_INITIAL_GUESS) { 470 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 471 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 472 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 473 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 474 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 475 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 476 i2 = 5; 477 idiag += 25; 478 for (i=1; i<m; i++) { 479 v = aa + 25*ai[i]; 480 vi = aj + ai[i]; 481 nz = diag[i] - ai[i]; 482 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 483 while (nz--) { 484 idx = 5*(*vi++); 485 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 486 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 487 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 488 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 489 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 490 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 491 v += 25; 492 } 493 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 494 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 495 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 496 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 497 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 498 idiag += 25; 499 i2 += 5; 500 } 501 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 502 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 503 } 504 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 505 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 506 i2 = 0; 507 mdiag = a->idiag+25*a->mbs; 508 for (i=0; i<m; i++) { 509 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 510 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 511 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 512 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 513 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 514 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 515 mdiag += 25; 516 i2 += 5; 517 } 518 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 519 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 520 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 521 } 522 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 523 idiag = a->idiag+25*a->mbs - 25; 524 i2 = 5*m - 5; 525 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 526 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 527 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 528 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 529 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 530 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 531 idiag -= 25; 532 i2 -= 5; 533 for (i=m-2; i>=0; i--) { 534 v = aa + 25*(diag[i]+1); 535 vi = aj + diag[i] + 1; 536 nz = ai[i+1] - diag[i] - 1; 537 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 538 while (nz--) { 539 idx = 5*(*vi++); 540 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 541 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 542 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 543 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 544 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 545 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 546 v += 25; 547 } 548 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 549 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 550 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 551 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 552 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 553 idiag -= 25; 554 i2 -= 5; 555 } 556 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 557 } 558 } else { 559 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560 } 561 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 562 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 Special version for direct calls from Fortran (Used in PETSc-fun3d) 568 */ 569 #if defined(PETSC_HAVE_FORTRAN_CAPS) 570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 572 #define matsetvaluesblocked4_ matsetvaluesblocked4 573 #endif 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "matsetvaluesblocked4_" 578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 579 { 580 Mat A = *AA; 581 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 583 PetscInt *ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,stepval,lastcol = -1; 585 const PetscScalar *value = v; 586 MatScalar *ap,*aa = a->a,*bap; 587 588 PetscFunctionBegin; 589 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 590 stepval = (n-1)*4; 591 for (k=0; k<m; k++) { /* loop over added rows */ 592 row = im[k]; 593 rp = aj + ai[row]; 594 ap = aa + 16*ai[row]; 595 nrow = ailen[row]; 596 low = 0; 597 high = nrow; 598 for (l=0; l<n; l++) { /* loop over added columns */ 599 col = in[l]; 600 if (col <= lastcol) low = 0; else high = nrow; 601 lastcol = col; 602 value = v + k*(stepval+4 + l)*4; 603 while (high-low > 7) { 604 t = (low+high)/2; 605 if (rp[t] > col) high = t; 606 else low = t; 607 } 608 for (i=low; i<high; i++) { 609 if (rp[i] > col) break; 610 if (rp[i] == col) { 611 bap = ap + 16*i; 612 for (ii=0; ii<4; ii++,value+=stepval) { 613 for (jj=ii; jj<16; jj+=4) { 614 bap[jj] += *value++; 615 } 616 } 617 goto noinsert2; 618 } 619 } 620 N = nrow++ - 1; 621 high++; /* added new column index thus must search to one higher than before */ 622 /* shift up all the later entries in this row */ 623 for (ii=N; ii>=i; ii--) { 624 rp[ii+1] = rp[ii]; 625 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 626 } 627 if (N >= i) { 628 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 629 } 630 rp[i] = col; 631 bap = ap + 16*i; 632 for (ii=0; ii<4; ii++,value+=stepval) { 633 for (jj=ii; jj<16; jj+=4) { 634 bap[jj] = *value++; 635 } 636 } 637 noinsert2:; 638 low = i; 639 } 640 ailen[row] = nrow; 641 } 642 PetscFunctionReturnVoid(); 643 } 644 EXTERN_C_END 645 646 #if defined(PETSC_HAVE_FORTRAN_CAPS) 647 #define matsetvalues4_ MATSETVALUES4 648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 649 #define matsetvalues4_ matsetvalues4 650 #endif 651 652 EXTERN_C_BEGIN 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatSetValues4_" 655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 656 { 657 Mat A = *AA; 658 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 659 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 660 PetscInt *ai=a->i,*ailen=a->ilen; 661 PetscInt *aj=a->j,brow,bcol; 662 PetscInt ridx,cidx,lastcol = -1; 663 MatScalar *ap,value,*aa=a->a,*bap; 664 665 PetscFunctionBegin; 666 for (k=0; k<m; k++) { /* loop over added rows */ 667 row = im[k]; brow = row/4; 668 rp = aj + ai[brow]; 669 ap = aa + 16*ai[brow]; 670 nrow = ailen[brow]; 671 low = 0; 672 high = nrow; 673 for (l=0; l<n; l++) { /* loop over added columns */ 674 col = in[l]; bcol = col/4; 675 ridx = row % 4; cidx = col % 4; 676 value = v[l + k*n]; 677 if (col <= lastcol) low = 0; else high = nrow; 678 lastcol = col; 679 while (high-low > 7) { 680 t = (low+high)/2; 681 if (rp[t] > bcol) high = t; 682 else low = t; 683 } 684 for (i=low; i<high; i++) { 685 if (rp[i] > bcol) break; 686 if (rp[i] == bcol) { 687 bap = ap + 16*i + 4*cidx + ridx; 688 *bap += value; 689 goto noinsert1; 690 } 691 } 692 N = nrow++ - 1; 693 high++; /* added new column thus must search to one higher than before */ 694 /* shift up all the later entries in this row */ 695 for (ii=N; ii>=i; ii--) { 696 rp[ii+1] = rp[ii]; 697 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 698 } 699 if (N>=i) { 700 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 701 } 702 rp[i] = bcol; 703 ap[16*i + 4*cidx + ridx] = value; 704 noinsert1:; 705 low = i; 706 } 707 ailen[brow] = nrow; 708 } 709 PetscFunctionReturnVoid(); 710 } 711 EXTERN_C_END 712 713 /* UGLY, ugly, ugly 714 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 715 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 716 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 717 converts the entries into single precision and then calls ..._MatScalar() to put them 718 into the single precision data structures. 719 */ 720 #if defined(PETSC_USE_MAT_SINGLE) 721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 722 #else 723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 724 #endif 725 726 #define CHUNKSIZE 10 727 728 /* 729 Checks for missing diagonals 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 734 { 735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 736 PetscErrorCode ierr; 737 PetscInt *diag,*jj = a->j,i; 738 739 PetscFunctionBegin; 740 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 741 diag = a->diag; 742 for (i=0; i<a->mbs; i++) { 743 if (jj[diag[i]] != i) { 744 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 745 } 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 753 { 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 755 PetscErrorCode ierr; 756 PetscInt i,j,m = a->mbs; 757 758 PetscFunctionBegin; 759 if (!a->diag) { 760 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 761 } 762 for (i=0; i<m; i++) { 763 a->diag[i] = a->i[i+1]; 764 for (j=a->i[i]; j<a->i[i+1]; j++) { 765 if (a->j[j] == i) { 766 a->diag[i] = j; 767 break; 768 } 769 } 770 } 771 PetscFunctionReturn(0); 772 } 773 774 775 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 776 777 #undef __FUNCT__ 778 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 779 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 780 { 781 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 782 PetscErrorCode ierr; 783 PetscInt n = a->mbs,i; 784 785 PetscFunctionBegin; 786 *nn = n; 787 if (!ia) PetscFunctionReturn(0); 788 if (symmetric) { 789 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 790 } else if (oshift == 1) { 791 /* temporarily add 1 to i and j indices */ 792 PetscInt nz = a->i[n]; 793 for (i=0; i<nz; i++) a->j[i]++; 794 for (i=0; i<n+1; i++) a->i[i]++; 795 *ia = a->i; *ja = a->j; 796 } else { 797 *ia = a->i; *ja = a->j; 798 } 799 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 805 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 806 { 807 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 808 PetscErrorCode ierr; 809 PetscInt i,n = a->mbs; 810 811 PetscFunctionBegin; 812 if (!ia) PetscFunctionReturn(0); 813 if (symmetric) { 814 ierr = PetscFree(*ia);CHKERRQ(ierr); 815 ierr = PetscFree(*ja);CHKERRQ(ierr); 816 } else if (oshift == 1) { 817 PetscInt nz = a->i[n]-1; 818 for (i=0; i<nz; i++) a->j[i]--; 819 for (i=0; i<n+1; i++) a->i[i]--; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatDestroy_SeqBAIJ" 826 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 829 PetscErrorCode ierr; 830 831 PetscFunctionBegin; 832 #if defined(PETSC_USE_LOG) 833 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 834 #endif 835 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 836 if (a->row) { 837 ierr = ISDestroy(a->row);CHKERRQ(ierr); 838 } 839 if (a->col) { 840 ierr = ISDestroy(a->col);CHKERRQ(ierr); 841 } 842 ierr = PetscFree(a->diag);CHKERRQ(ierr); 843 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 844 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 845 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 846 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 847 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 848 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 849 #if defined(PETSC_USE_MAT_SINGLE) 850 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 851 #endif 852 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 853 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 854 855 ierr = PetscFree(a);CHKERRQ(ierr); 856 857 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 858 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 859 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 860 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 861 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 862 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 863 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatSetOption_SeqBAIJ" 869 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 870 { 871 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 872 PetscErrorCode ierr; 873 874 PetscFunctionBegin; 875 switch (op) { 876 case MAT_ROW_ORIENTED: 877 a->roworiented = PETSC_TRUE; 878 break; 879 case MAT_COLUMN_ORIENTED: 880 a->roworiented = PETSC_FALSE; 881 break; 882 case MAT_COLUMNS_SORTED: 883 a->sorted = PETSC_TRUE; 884 break; 885 case MAT_COLUMNS_UNSORTED: 886 a->sorted = PETSC_FALSE; 887 break; 888 case MAT_KEEP_ZEROED_ROWS: 889 a->keepzeroedrows = PETSC_TRUE; 890 break; 891 case MAT_NO_NEW_NONZERO_LOCATIONS: 892 a->nonew = 1; 893 break; 894 case MAT_NEW_NONZERO_LOCATION_ERR: 895 a->nonew = -1; 896 break; 897 case MAT_NEW_NONZERO_ALLOCATION_ERR: 898 a->nonew = -2; 899 break; 900 case MAT_YES_NEW_NONZERO_LOCATIONS: 901 a->nonew = 0; 902 break; 903 case MAT_ROWS_SORTED: 904 case MAT_ROWS_UNSORTED: 905 case MAT_YES_NEW_DIAGONALS: 906 case MAT_IGNORE_OFF_PROC_ENTRIES: 907 case MAT_USE_HASH_TABLE: 908 ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr); 909 break; 910 case MAT_NO_NEW_DIAGONALS: 911 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 912 case MAT_SYMMETRIC: 913 case MAT_STRUCTURALLY_SYMMETRIC: 914 case MAT_NOT_SYMMETRIC: 915 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 916 case MAT_HERMITIAN: 917 case MAT_NOT_HERMITIAN: 918 case MAT_SYMMETRY_ETERNAL: 919 case MAT_NOT_SYMMETRY_ETERNAL: 920 break; 921 default: 922 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 923 } 924 PetscFunctionReturn(0); 925 } 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatGetRow_SeqBAIJ" 929 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 930 { 931 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 932 PetscErrorCode ierr; 933 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 934 MatScalar *aa,*aa_i; 935 PetscScalar *v_i; 936 937 PetscFunctionBegin; 938 bs = A->rmap.bs; 939 ai = a->i; 940 aj = a->j; 941 aa = a->a; 942 bs2 = a->bs2; 943 944 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 945 946 bn = row/bs; /* Block number */ 947 bp = row % bs; /* Block Position */ 948 M = ai[bn+1] - ai[bn]; 949 *nz = bs*M; 950 951 if (v) { 952 *v = 0; 953 if (*nz) { 954 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 955 for (i=0; i<M; i++) { /* for each block in the block row */ 956 v_i = *v + i*bs; 957 aa_i = aa + bs2*(ai[bn] + i); 958 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 959 } 960 } 961 } 962 963 if (idx) { 964 *idx = 0; 965 if (*nz) { 966 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 967 for (i=0; i<M; i++) { /* for each block in the block row */ 968 idx_i = *idx + i*bs; 969 itmp = bs*aj[ai[bn] + i]; 970 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 971 } 972 } 973 } 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 979 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 980 { 981 PetscErrorCode ierr; 982 983 PetscFunctionBegin; 984 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 985 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatTranspose_SeqBAIJ" 991 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 992 { 993 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 994 Mat C; 995 PetscErrorCode ierr; 996 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 997 PetscInt *rows,*cols,bs2=a->bs2; 998 PetscScalar *array; 999 1000 PetscFunctionBegin; 1001 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1002 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1003 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1004 1005 #if defined(PETSC_USE_MAT_SINGLE) 1006 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1007 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1008 #else 1009 array = a->a; 1010 #endif 1011 1012 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1013 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1014 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1015 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1016 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1017 ierr = PetscFree(col);CHKERRQ(ierr); 1018 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1019 cols = rows + bs; 1020 for (i=0; i<mbs; i++) { 1021 cols[0] = i*bs; 1022 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1023 len = ai[i+1] - ai[i]; 1024 for (j=0; j<len; j++) { 1025 rows[0] = (*aj++)*bs; 1026 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1027 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1028 array += bs2; 1029 } 1030 } 1031 ierr = PetscFree(rows);CHKERRQ(ierr); 1032 #if defined(PETSC_USE_MAT_SINGLE) 1033 ierr = PetscFree(array); 1034 #endif 1035 1036 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1037 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 1039 if (B) { 1040 *B = C; 1041 } else { 1042 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1043 } 1044 PetscFunctionReturn(0); 1045 } 1046 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1049 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1050 { 1051 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1052 PetscErrorCode ierr; 1053 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1054 int fd; 1055 PetscScalar *aa; 1056 FILE *file; 1057 1058 PetscFunctionBegin; 1059 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1060 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1061 col_lens[0] = MAT_FILE_COOKIE; 1062 1063 col_lens[1] = A->rmap.N; 1064 col_lens[2] = A->cmap.n; 1065 col_lens[3] = a->nz*bs2; 1066 1067 /* store lengths of each row and write (including header) to file */ 1068 count = 0; 1069 for (i=0; i<a->mbs; i++) { 1070 for (j=0; j<bs; j++) { 1071 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1072 } 1073 } 1074 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1075 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1076 1077 /* store column indices (zero start index) */ 1078 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1079 count = 0; 1080 for (i=0; i<a->mbs; i++) { 1081 for (j=0; j<bs; j++) { 1082 for (k=a->i[i]; k<a->i[i+1]; k++) { 1083 for (l=0; l<bs; l++) { 1084 jj[count++] = bs*a->j[k] + l; 1085 } 1086 } 1087 } 1088 } 1089 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1090 ierr = PetscFree(jj);CHKERRQ(ierr); 1091 1092 /* store nonzero values */ 1093 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1094 count = 0; 1095 for (i=0; i<a->mbs; i++) { 1096 for (j=0; j<bs; j++) { 1097 for (k=a->i[i]; k<a->i[i+1]; k++) { 1098 for (l=0; l<bs; l++) { 1099 aa[count++] = a->a[bs2*k + l*bs + j]; 1100 } 1101 } 1102 } 1103 } 1104 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1105 ierr = PetscFree(aa);CHKERRQ(ierr); 1106 1107 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1108 if (file) { 1109 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1116 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1121 PetscViewerFormat format; 1122 1123 PetscFunctionBegin; 1124 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1125 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1126 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1127 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1128 Mat aij; 1129 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1130 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1131 ierr = MatDestroy(aij);CHKERRQ(ierr); 1132 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1133 PetscFunctionReturn(0); 1134 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1135 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1136 for (i=0; i<a->mbs; i++) { 1137 for (j=0; j<bs; j++) { 1138 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1139 for (k=a->i[i]; k<a->i[i+1]; k++) { 1140 for (l=0; l<bs; l++) { 1141 #if defined(PETSC_USE_COMPLEX) 1142 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1143 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1144 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1145 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1146 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1147 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1148 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1149 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1150 } 1151 #else 1152 if (a->a[bs2*k + l*bs + j] != 0.0) { 1153 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1154 } 1155 #endif 1156 } 1157 } 1158 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1159 } 1160 } 1161 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1162 } else { 1163 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1164 for (i=0; i<a->mbs; i++) { 1165 for (j=0; j<bs; j++) { 1166 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1167 for (k=a->i[i]; k<a->i[i+1]; k++) { 1168 for (l=0; l<bs; l++) { 1169 #if defined(PETSC_USE_COMPLEX) 1170 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1171 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1172 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1173 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1174 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1175 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1176 } else { 1177 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1178 } 1179 #else 1180 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1181 #endif 1182 } 1183 } 1184 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1185 } 1186 } 1187 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1188 } 1189 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1195 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1196 { 1197 Mat A = (Mat) Aa; 1198 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1199 PetscErrorCode ierr; 1200 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1201 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1202 MatScalar *aa; 1203 PetscViewer viewer; 1204 1205 PetscFunctionBegin; 1206 1207 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1208 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1209 1210 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1211 1212 /* loop over matrix elements drawing boxes */ 1213 color = PETSC_DRAW_BLUE; 1214 for (i=0,row=0; i<mbs; i++,row+=bs) { 1215 for (j=a->i[i]; j<a->i[i+1]; j++) { 1216 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1217 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1218 aa = a->a + j*bs2; 1219 for (k=0; k<bs; k++) { 1220 for (l=0; l<bs; l++) { 1221 if (PetscRealPart(*aa++) >= 0.) continue; 1222 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1223 } 1224 } 1225 } 1226 } 1227 color = PETSC_DRAW_CYAN; 1228 for (i=0,row=0; i<mbs; i++,row+=bs) { 1229 for (j=a->i[i]; j<a->i[i+1]; j++) { 1230 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1231 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1232 aa = a->a + j*bs2; 1233 for (k=0; k<bs; k++) { 1234 for (l=0; l<bs; l++) { 1235 if (PetscRealPart(*aa++) != 0.) continue; 1236 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1237 } 1238 } 1239 } 1240 } 1241 1242 color = PETSC_DRAW_RED; 1243 for (i=0,row=0; i<mbs; i++,row+=bs) { 1244 for (j=a->i[i]; j<a->i[i+1]; j++) { 1245 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1246 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1247 aa = a->a + j*bs2; 1248 for (k=0; k<bs; k++) { 1249 for (l=0; l<bs; l++) { 1250 if (PetscRealPart(*aa++) <= 0.) continue; 1251 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1252 } 1253 } 1254 } 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1261 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1262 { 1263 PetscErrorCode ierr; 1264 PetscReal xl,yl,xr,yr,w,h; 1265 PetscDraw draw; 1266 PetscTruth isnull; 1267 1268 PetscFunctionBegin; 1269 1270 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1271 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1272 1273 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1274 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1275 xr += w; yr += h; xl = -w; yl = -h; 1276 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1277 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1278 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #undef __FUNCT__ 1283 #define __FUNCT__ "MatView_SeqBAIJ" 1284 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1285 { 1286 PetscErrorCode ierr; 1287 PetscTruth iascii,isbinary,isdraw; 1288 1289 PetscFunctionBegin; 1290 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1291 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1292 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1293 if (iascii){ 1294 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1295 } else if (isbinary) { 1296 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1297 } else if (isdraw) { 1298 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1299 } else { 1300 Mat B; 1301 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1302 ierr = MatView(B,viewer);CHKERRQ(ierr); 1303 ierr = MatDestroy(B);CHKERRQ(ierr); 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1311 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1312 { 1313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1314 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1315 PetscInt *ai = a->i,*ailen = a->ilen; 1316 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1317 MatScalar *ap,*aa = a->a,zero = 0.0; 1318 1319 PetscFunctionBegin; 1320 for (k=0; k<m; k++) { /* loop over rows */ 1321 row = im[k]; brow = row/bs; 1322 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1323 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1324 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1325 nrow = ailen[brow]; 1326 for (l=0; l<n; l++) { /* loop over columns */ 1327 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1328 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1329 col = in[l] ; 1330 bcol = col/bs; 1331 cidx = col%bs; 1332 ridx = row%bs; 1333 high = nrow; 1334 low = 0; /* assume unsorted */ 1335 while (high-low > 5) { 1336 t = (low+high)/2; 1337 if (rp[t] > bcol) high = t; 1338 else low = t; 1339 } 1340 for (i=low; i<high; i++) { 1341 if (rp[i] > bcol) break; 1342 if (rp[i] == bcol) { 1343 *v++ = ap[bs2*i+bs*cidx+ridx]; 1344 goto finished; 1345 } 1346 } 1347 *v++ = zero; 1348 finished:; 1349 } 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #if defined(PETSC_USE_MAT_SINGLE) 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1357 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1358 { 1359 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1360 PetscErrorCode ierr; 1361 PetscInt i,N = m*n*b->bs2; 1362 MatScalar *vsingle; 1363 1364 PetscFunctionBegin; 1365 if (N > b->setvalueslen) { 1366 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1367 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1368 b->setvalueslen = N; 1369 } 1370 vsingle = b->setvaluescopy; 1371 for (i=0; i<N; i++) { 1372 vsingle[i] = v[i]; 1373 } 1374 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 #endif 1378 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1382 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1383 { 1384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1385 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1386 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1387 PetscErrorCode ierr; 1388 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1389 PetscTruth roworiented=a->roworiented; 1390 const MatScalar *value = v; 1391 MatScalar *ap,*aa = a->a,*bap; 1392 1393 PetscFunctionBegin; 1394 if (roworiented) { 1395 stepval = (n-1)*bs; 1396 } else { 1397 stepval = (m-1)*bs; 1398 } 1399 for (k=0; k<m; k++) { /* loop over added rows */ 1400 row = im[k]; 1401 if (row < 0) continue; 1402 #if defined(PETSC_USE_DEBUG) 1403 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1404 #endif 1405 rp = aj + ai[row]; 1406 ap = aa + bs2*ai[row]; 1407 rmax = imax[row]; 1408 nrow = ailen[row]; 1409 low = 0; 1410 high = nrow; 1411 for (l=0; l<n; l++) { /* loop over added columns */ 1412 if (in[l] < 0) continue; 1413 #if defined(PETSC_USE_DEBUG) 1414 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1415 #endif 1416 col = in[l]; 1417 if (roworiented) { 1418 value = v + k*(stepval+bs)*bs + l*bs; 1419 } else { 1420 value = v + l*(stepval+bs)*bs + k*bs; 1421 } 1422 if (col <= lastcol) low = 0; else high = nrow; 1423 lastcol = col; 1424 while (high-low > 7) { 1425 t = (low+high)/2; 1426 if (rp[t] > col) high = t; 1427 else low = t; 1428 } 1429 for (i=low; i<high; i++) { 1430 if (rp[i] > col) break; 1431 if (rp[i] == col) { 1432 bap = ap + bs2*i; 1433 if (roworiented) { 1434 if (is == ADD_VALUES) { 1435 for (ii=0; ii<bs; ii++,value+=stepval) { 1436 for (jj=ii; jj<bs2; jj+=bs) { 1437 bap[jj] += *value++; 1438 } 1439 } 1440 } else { 1441 for (ii=0; ii<bs; ii++,value+=stepval) { 1442 for (jj=ii; jj<bs2; jj+=bs) { 1443 bap[jj] = *value++; 1444 } 1445 } 1446 } 1447 } else { 1448 if (is == ADD_VALUES) { 1449 for (ii=0; ii<bs; ii++,value+=stepval) { 1450 for (jj=0; jj<bs; jj++) { 1451 *bap++ += *value++; 1452 } 1453 } 1454 } else { 1455 for (ii=0; ii<bs; ii++,value+=stepval) { 1456 for (jj=0; jj<bs; jj++) { 1457 *bap++ = *value++; 1458 } 1459 } 1460 } 1461 } 1462 goto noinsert2; 1463 } 1464 } 1465 if (nonew == 1) goto noinsert2; 1466 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1467 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew); 1468 N = nrow++ - 1; high++; 1469 /* shift up all the later entries in this row */ 1470 for (ii=N; ii>=i; ii--) { 1471 rp[ii+1] = rp[ii]; 1472 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1473 } 1474 if (N >= i) { 1475 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1476 } 1477 rp[i] = col; 1478 bap = ap + bs2*i; 1479 if (roworiented) { 1480 for (ii=0; ii<bs; ii++,value+=stepval) { 1481 for (jj=ii; jj<bs2; jj+=bs) { 1482 bap[jj] = *value++; 1483 } 1484 } 1485 } else { 1486 for (ii=0; ii<bs; ii++,value+=stepval) { 1487 for (jj=0; jj<bs; jj++) { 1488 *bap++ = *value++; 1489 } 1490 } 1491 } 1492 noinsert2:; 1493 low = i; 1494 } 1495 ailen[row] = nrow; 1496 } 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1502 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1503 { 1504 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1505 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1506 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1507 PetscErrorCode ierr; 1508 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1509 MatScalar *aa = a->a,*ap; 1510 PetscReal ratio=0.6; 1511 1512 PetscFunctionBegin; 1513 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1514 1515 if (m) rmax = ailen[0]; 1516 for (i=1; i<mbs; i++) { 1517 /* move each row back by the amount of empty slots (fshift) before it*/ 1518 fshift += imax[i-1] - ailen[i-1]; 1519 rmax = PetscMax(rmax,ailen[i]); 1520 if (fshift) { 1521 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1522 N = ailen[i]; 1523 for (j=0; j<N; j++) { 1524 ip[j-fshift] = ip[j]; 1525 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1526 } 1527 } 1528 ai[i] = ai[i-1] + ailen[i-1]; 1529 } 1530 if (mbs) { 1531 fshift += imax[mbs-1] - ailen[mbs-1]; 1532 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1533 } 1534 /* reset ilen and imax for each row */ 1535 for (i=0; i<mbs; i++) { 1536 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1537 } 1538 a->nz = ai[mbs]; 1539 1540 /* diagonals may have moved, so kill the diagonal pointers */ 1541 a->idiagvalid = PETSC_FALSE; 1542 if (fshift && a->diag) { 1543 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1544 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1545 a->diag = 0; 1546 } 1547 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1548 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1549 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1550 a->reallocs = 0; 1551 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1552 1553 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1554 if (a->compressedrow.use){ 1555 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1556 } 1557 1558 A->same_nonzero = PETSC_TRUE; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 /* 1563 This function returns an array of flags which indicate the locations of contiguous 1564 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1565 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1566 Assume: sizes should be long enough to hold all the values. 1567 */ 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1570 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1571 { 1572 PetscInt i,j,k,row; 1573 PetscTruth flg; 1574 1575 PetscFunctionBegin; 1576 for (i=0,j=0; i<n; j++) { 1577 row = idx[i]; 1578 if (row%bs!=0) { /* Not the begining of a block */ 1579 sizes[j] = 1; 1580 i++; 1581 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1582 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1583 i++; 1584 } else { /* Begining of the block, so check if the complete block exists */ 1585 flg = PETSC_TRUE; 1586 for (k=1; k<bs; k++) { 1587 if (row+k != idx[i+k]) { /* break in the block */ 1588 flg = PETSC_FALSE; 1589 break; 1590 } 1591 } 1592 if (flg) { /* No break in the bs */ 1593 sizes[j] = bs; 1594 i+= bs; 1595 } else { 1596 sizes[j] = 1; 1597 i++; 1598 } 1599 } 1600 } 1601 *bs_max = j; 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1607 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1608 { 1609 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1610 PetscErrorCode ierr; 1611 PetscInt i,j,k,count,*rows; 1612 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1613 PetscScalar zero = 0.0; 1614 MatScalar *aa; 1615 1616 PetscFunctionBegin; 1617 /* Make a copy of the IS and sort it */ 1618 /* allocate memory for rows,sizes */ 1619 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1620 sizes = rows + is_n; 1621 1622 /* copy IS values to rows, and sort them */ 1623 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1624 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1625 if (baij->keepzeroedrows) { 1626 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1627 bs_max = is_n; 1628 A->same_nonzero = PETSC_TRUE; 1629 } else { 1630 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1631 A->same_nonzero = PETSC_FALSE; 1632 } 1633 1634 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1635 row = rows[j]; 1636 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1637 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1638 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1639 if (sizes[i] == bs && !baij->keepzeroedrows) { 1640 if (diag != 0.0) { 1641 if (baij->ilen[row/bs] > 0) { 1642 baij->ilen[row/bs] = 1; 1643 baij->j[baij->i[row/bs]] = row/bs; 1644 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1645 } 1646 /* Now insert all the diagonal values for this bs */ 1647 for (k=0; k<bs; k++) { 1648 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1649 } 1650 } else { /* (diag == 0.0) */ 1651 baij->ilen[row/bs] = 0; 1652 } /* end (diag == 0.0) */ 1653 } else { /* (sizes[i] != bs) */ 1654 #if defined (PETSC_USE_DEBUG) 1655 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1656 #endif 1657 for (k=0; k<count; k++) { 1658 aa[0] = zero; 1659 aa += bs; 1660 } 1661 if (diag != 0.0) { 1662 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1663 } 1664 } 1665 } 1666 1667 ierr = PetscFree(rows);CHKERRQ(ierr); 1668 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1674 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1675 { 1676 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1677 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1678 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1679 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1680 PetscErrorCode ierr; 1681 PetscInt ridx,cidx,bs2=a->bs2; 1682 PetscTruth roworiented=a->roworiented; 1683 MatScalar *ap,value,*aa=a->a,*bap; 1684 1685 PetscFunctionBegin; 1686 for (k=0; k<m; k++) { /* loop over added rows */ 1687 row = im[k]; 1688 brow = row/bs; 1689 if (row < 0) continue; 1690 #if defined(PETSC_USE_DEBUG) 1691 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1692 #endif 1693 rp = aj + ai[brow]; 1694 ap = aa + bs2*ai[brow]; 1695 rmax = imax[brow]; 1696 nrow = ailen[brow]; 1697 low = 0; 1698 high = nrow; 1699 for (l=0; l<n; l++) { /* loop over added columns */ 1700 if (in[l] < 0) continue; 1701 #if defined(PETSC_USE_DEBUG) 1702 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1703 #endif 1704 col = in[l]; bcol = col/bs; 1705 ridx = row % bs; cidx = col % bs; 1706 if (roworiented) { 1707 value = v[l + k*n]; 1708 } else { 1709 value = v[k + l*m]; 1710 } 1711 if (col <= lastcol) low = 0; else high = nrow; 1712 lastcol = col; 1713 while (high-low > 7) { 1714 t = (low+high)/2; 1715 if (rp[t] > bcol) high = t; 1716 else low = t; 1717 } 1718 for (i=low; i<high; i++) { 1719 if (rp[i] > bcol) break; 1720 if (rp[i] == bcol) { 1721 bap = ap + bs2*i + bs*cidx + ridx; 1722 if (is == ADD_VALUES) *bap += value; 1723 else *bap = value; 1724 goto noinsert1; 1725 } 1726 } 1727 if (nonew == 1) goto noinsert1; 1728 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1729 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew); 1730 N = nrow++ - 1; high++; 1731 /* shift up all the later entries in this row */ 1732 for (ii=N; ii>=i; ii--) { 1733 rp[ii+1] = rp[ii]; 1734 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1735 } 1736 if (N>=i) { 1737 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1738 } 1739 rp[i] = bcol; 1740 ap[bs2*i + bs*cidx + ridx] = value; 1741 a->nz++; 1742 noinsert1:; 1743 low = i; 1744 } 1745 ailen[brow] = nrow; 1746 } 1747 A->same_nonzero = PETSC_FALSE; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1754 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1755 { 1756 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1757 Mat outA; 1758 PetscErrorCode ierr; 1759 PetscTruth row_identity,col_identity; 1760 1761 PetscFunctionBegin; 1762 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1763 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1764 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1765 if (!row_identity || !col_identity) { 1766 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1767 } 1768 1769 outA = inA; 1770 inA->factor = FACTOR_LU; 1771 1772 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1773 1774 a->row = row; 1775 a->col = col; 1776 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1777 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1778 1779 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1780 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1781 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1782 1783 /* 1784 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1785 for ILU(0) factorization with natural ordering 1786 */ 1787 if (inA->rmap.bs < 8) { 1788 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1789 } else { 1790 if (!a->solve_work) { 1791 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1792 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1793 } 1794 } 1795 1796 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1797 1798 PetscFunctionReturn(0); 1799 } 1800 1801 EXTERN_C_BEGIN 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1804 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1805 { 1806 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1807 PetscInt i,nz,nbs; 1808 1809 PetscFunctionBegin; 1810 nz = baij->maxnz/baij->bs2; 1811 nbs = baij->nbs; 1812 for (i=0; i<nz; i++) { 1813 baij->j[i] = indices[i]; 1814 } 1815 baij->nz = nz; 1816 for (i=0; i<nbs; i++) { 1817 baij->ilen[i] = baij->imax[i]; 1818 } 1819 1820 PetscFunctionReturn(0); 1821 } 1822 EXTERN_C_END 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1826 /*@ 1827 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1828 in the matrix. 1829 1830 Input Parameters: 1831 + mat - the SeqBAIJ matrix 1832 - indices - the column indices 1833 1834 Level: advanced 1835 1836 Notes: 1837 This can be called if you have precomputed the nonzero structure of the 1838 matrix and want to provide it to the matrix object to improve the performance 1839 of the MatSetValues() operation. 1840 1841 You MUST have set the correct numbers of nonzeros per row in the call to 1842 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1843 1844 MUST be called before any calls to MatSetValues(); 1845 1846 @*/ 1847 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1848 { 1849 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1850 1851 PetscFunctionBegin; 1852 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1853 PetscValidPointer(indices,2); 1854 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1855 if (f) { 1856 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1857 } else { 1858 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1865 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1866 { 1867 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1868 PetscErrorCode ierr; 1869 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1870 PetscReal atmp; 1871 PetscScalar *x,zero = 0.0; 1872 MatScalar *aa; 1873 PetscInt ncols,brow,krow,kcol; 1874 1875 PetscFunctionBegin; 1876 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1877 bs = A->rmap.bs; 1878 aa = a->a; 1879 ai = a->i; 1880 aj = a->j; 1881 mbs = a->mbs; 1882 1883 ierr = VecSet(v,zero);CHKERRQ(ierr); 1884 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1885 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1886 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1887 for (i=0; i<mbs; i++) { 1888 ncols = ai[1] - ai[0]; ai++; 1889 brow = bs*i; 1890 for (j=0; j<ncols; j++){ 1891 /* bcol = bs*(*aj); */ 1892 for (kcol=0; kcol<bs; kcol++){ 1893 for (krow=0; krow<bs; krow++){ 1894 atmp = PetscAbsScalar(*aa); aa++; 1895 row = brow + krow; /* row index */ 1896 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1897 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1898 } 1899 } 1900 aj++; 1901 } 1902 } 1903 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNCT__ 1908 #define __FUNCT__ "MatCopy_SeqBAIJ" 1909 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1910 { 1911 PetscErrorCode ierr; 1912 1913 PetscFunctionBegin; 1914 /* If the two matrices have the same copy implementation, use fast copy. */ 1915 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1916 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1917 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1918 1919 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1920 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1921 } 1922 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1923 } else { 1924 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1931 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1932 { 1933 PetscErrorCode ierr; 1934 1935 PetscFunctionBegin; 1936 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1937 PetscFunctionReturn(0); 1938 } 1939 1940 #undef __FUNCT__ 1941 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1942 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1943 { 1944 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1945 PetscFunctionBegin; 1946 *array = a->a; 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1952 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1953 { 1954 PetscFunctionBegin; 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #include "petscblaslapack.h" 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1961 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1962 { 1963 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1964 PetscErrorCode ierr; 1965 PetscInt i,bs=Y->rmap.bs,j,bs2; 1966 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1967 1968 PetscFunctionBegin; 1969 if (str == SAME_NONZERO_PATTERN) { 1970 PetscScalar alpha = a; 1971 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1972 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1973 if (y->xtoy && y->XtoY != X) { 1974 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1975 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1976 } 1977 if (!y->xtoy) { /* get xtoy */ 1978 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1979 y->XtoY = X; 1980 } 1981 bs2 = bs*bs; 1982 for (i=0; i<x->nz; i++) { 1983 j = 0; 1984 while (j < bs2){ 1985 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1986 j++; 1987 } 1988 } 1989 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1990 } else { 1991 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatRealPart_SeqBAIJ" 1998 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 1999 { 2000 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2001 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2002 PetscScalar *aa = a->a; 2003 2004 PetscFunctionBegin; 2005 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2011 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2012 { 2013 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2014 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2015 PetscScalar *aa = a->a; 2016 2017 PetscFunctionBegin; 2018 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 2023 /* -------------------------------------------------------------------*/ 2024 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2025 MatGetRow_SeqBAIJ, 2026 MatRestoreRow_SeqBAIJ, 2027 MatMult_SeqBAIJ_N, 2028 /* 4*/ MatMultAdd_SeqBAIJ_N, 2029 MatMultTranspose_SeqBAIJ, 2030 MatMultTransposeAdd_SeqBAIJ, 2031 MatSolve_SeqBAIJ_N, 2032 0, 2033 0, 2034 /*10*/ 0, 2035 MatLUFactor_SeqBAIJ, 2036 0, 2037 0, 2038 MatTranspose_SeqBAIJ, 2039 /*15*/ MatGetInfo_SeqBAIJ, 2040 MatEqual_SeqBAIJ, 2041 MatGetDiagonal_SeqBAIJ, 2042 MatDiagonalScale_SeqBAIJ, 2043 MatNorm_SeqBAIJ, 2044 /*20*/ 0, 2045 MatAssemblyEnd_SeqBAIJ, 2046 0, 2047 MatSetOption_SeqBAIJ, 2048 MatZeroEntries_SeqBAIJ, 2049 /*25*/ MatZeroRows_SeqBAIJ, 2050 MatLUFactorSymbolic_SeqBAIJ, 2051 MatLUFactorNumeric_SeqBAIJ_N, 2052 MatCholeskyFactorSymbolic_SeqBAIJ, 2053 MatCholeskyFactorNumeric_SeqBAIJ_N, 2054 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2055 MatILUFactorSymbolic_SeqBAIJ, 2056 MatICCFactorSymbolic_SeqBAIJ, 2057 MatGetArray_SeqBAIJ, 2058 MatRestoreArray_SeqBAIJ, 2059 /*35*/ MatDuplicate_SeqBAIJ, 2060 0, 2061 0, 2062 MatILUFactor_SeqBAIJ, 2063 0, 2064 /*40*/ MatAXPY_SeqBAIJ, 2065 MatGetSubMatrices_SeqBAIJ, 2066 MatIncreaseOverlap_SeqBAIJ, 2067 MatGetValues_SeqBAIJ, 2068 MatCopy_SeqBAIJ, 2069 /*45*/ 0, 2070 MatScale_SeqBAIJ, 2071 0, 2072 0, 2073 0, 2074 /*50*/ 0, 2075 MatGetRowIJ_SeqBAIJ, 2076 MatRestoreRowIJ_SeqBAIJ, 2077 0, 2078 0, 2079 /*55*/ 0, 2080 0, 2081 0, 2082 0, 2083 MatSetValuesBlocked_SeqBAIJ, 2084 /*60*/ MatGetSubMatrix_SeqBAIJ, 2085 MatDestroy_SeqBAIJ, 2086 MatView_SeqBAIJ, 2087 0, 2088 0, 2089 /*65*/ 0, 2090 0, 2091 0, 2092 0, 2093 0, 2094 /*70*/ MatGetRowMax_SeqBAIJ, 2095 MatConvert_Basic, 2096 0, 2097 0, 2098 0, 2099 /*75*/ 0, 2100 0, 2101 0, 2102 0, 2103 0, 2104 /*80*/ 0, 2105 0, 2106 0, 2107 0, 2108 MatLoad_SeqBAIJ, 2109 /*85*/ 0, 2110 0, 2111 0, 2112 0, 2113 0, 2114 /*90*/ 0, 2115 0, 2116 0, 2117 0, 2118 0, 2119 /*95*/ 0, 2120 0, 2121 0, 2122 0, 2123 0, 2124 /*100*/0, 2125 0, 2126 0, 2127 0, 2128 0, 2129 /*105*/0, 2130 MatRealPart_SeqBAIJ, 2131 MatImaginaryPart_SeqBAIJ 2132 }; 2133 2134 EXTERN_C_BEGIN 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2137 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2138 { 2139 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2140 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 if (aij->nonew != 1) { 2145 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2146 } 2147 2148 /* allocate space for values if not already there */ 2149 if (!aij->saved_values) { 2150 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2151 } 2152 2153 /* copy values over */ 2154 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2155 PetscFunctionReturn(0); 2156 } 2157 EXTERN_C_END 2158 2159 EXTERN_C_BEGIN 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2162 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2163 { 2164 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2165 PetscErrorCode ierr; 2166 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2167 2168 PetscFunctionBegin; 2169 if (aij->nonew != 1) { 2170 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2171 } 2172 if (!aij->saved_values) { 2173 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2174 } 2175 2176 /* copy values over */ 2177 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2178 PetscFunctionReturn(0); 2179 } 2180 EXTERN_C_END 2181 2182 EXTERN_C_BEGIN 2183 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2184 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2185 EXTERN_C_END 2186 2187 EXTERN_C_BEGIN 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2190 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2191 { 2192 Mat_SeqBAIJ *b; 2193 PetscErrorCode ierr; 2194 PetscInt i,mbs,nbs,bs2,newbs = bs; 2195 PetscTruth flg,skipallocation = PETSC_FALSE; 2196 2197 PetscFunctionBegin; 2198 2199 if (nz == MAT_SKIP_ALLOCATION) { 2200 skipallocation = PETSC_TRUE; 2201 nz = 0; 2202 } 2203 2204 ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2205 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2206 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2207 2208 if (nnz && newbs != bs) { 2209 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2210 } 2211 bs = newbs; 2212 2213 B->rmap.bs = B->cmap.bs = bs; 2214 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2215 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2216 2217 B->preallocated = PETSC_TRUE; 2218 2219 mbs = B->rmap.n/bs; 2220 nbs = B->cmap.n/bs; 2221 bs2 = bs*bs; 2222 2223 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2224 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2225 } 2226 2227 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2228 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2229 if (nnz) { 2230 for (i=0; i<mbs; i++) { 2231 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2232 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2233 } 2234 } 2235 2236 b = (Mat_SeqBAIJ*)B->data; 2237 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2238 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2239 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2240 2241 B->ops->solve = MatSolve_SeqBAIJ_Update; 2242 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2243 if (!flg) { 2244 switch (bs) { 2245 case 1: 2246 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2247 B->ops->mult = MatMult_SeqBAIJ_1; 2248 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2249 break; 2250 case 2: 2251 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2252 B->ops->mult = MatMult_SeqBAIJ_2; 2253 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2254 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2255 break; 2256 case 3: 2257 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2258 B->ops->mult = MatMult_SeqBAIJ_3; 2259 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2260 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2261 break; 2262 case 4: 2263 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2264 B->ops->mult = MatMult_SeqBAIJ_4; 2265 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2266 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2267 break; 2268 case 5: 2269 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2270 B->ops->mult = MatMult_SeqBAIJ_5; 2271 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2272 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2273 break; 2274 case 6: 2275 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2276 B->ops->mult = MatMult_SeqBAIJ_6; 2277 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2278 break; 2279 case 7: 2280 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2281 B->ops->mult = MatMult_SeqBAIJ_7; 2282 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2283 break; 2284 default: 2285 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2286 B->ops->mult = MatMult_SeqBAIJ_N; 2287 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2288 break; 2289 } 2290 } 2291 B->rmap.bs = bs; 2292 b->mbs = mbs; 2293 b->nbs = nbs; 2294 if (!skipallocation) { 2295 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2296 /* b->ilen will count nonzeros in each block row so far. */ 2297 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2298 if (!nnz) { 2299 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2300 else if (nz <= 0) nz = 1; 2301 for (i=0; i<mbs; i++) b->imax[i] = nz; 2302 nz = nz*mbs; 2303 } else { 2304 nz = 0; 2305 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2306 } 2307 2308 /* allocate the matrix space */ 2309 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2310 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2311 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2312 b->singlemalloc = PETSC_TRUE; 2313 2314 b->i[0] = 0; 2315 for (i=1; i<mbs+1; i++) { 2316 b->i[i] = b->i[i-1] + b->imax[i-1]; 2317 } 2318 b->free_a = PETSC_TRUE; 2319 b->free_ij = PETSC_TRUE; 2320 } else { 2321 b->free_a = PETSC_FALSE; 2322 b->free_ij = PETSC_FALSE; 2323 } 2324 2325 B->rmap.bs = bs; 2326 b->bs2 = bs2; 2327 b->mbs = mbs; 2328 b->nz = 0; 2329 b->maxnz = nz*bs2; 2330 B->info.nz_unneeded = (PetscReal)b->maxnz; 2331 PetscFunctionReturn(0); 2332 } 2333 EXTERN_C_END 2334 2335 /*MC 2336 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2337 block sparse compressed row format. 2338 2339 Options Database Keys: 2340 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2341 2342 Level: beginner 2343 2344 .seealso: MatCreateSeqBAIJ() 2345 M*/ 2346 2347 EXTERN_C_BEGIN 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatCreate_SeqBAIJ" 2350 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2351 { 2352 PetscErrorCode ierr; 2353 PetscMPIInt size; 2354 Mat_SeqBAIJ *b; 2355 2356 PetscFunctionBegin; 2357 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2358 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2359 2360 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2361 B->data = (void*)b; 2362 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2363 B->factor = 0; 2364 B->mapping = 0; 2365 b->row = 0; 2366 b->col = 0; 2367 b->icol = 0; 2368 b->reallocs = 0; 2369 b->saved_values = 0; 2370 #if defined(PETSC_USE_MAT_SINGLE) 2371 b->setvalueslen = 0; 2372 b->setvaluescopy = PETSC_NULL; 2373 #endif 2374 2375 b->sorted = PETSC_FALSE; 2376 b->roworiented = PETSC_TRUE; 2377 b->nonew = 0; 2378 b->diag = 0; 2379 b->solve_work = 0; 2380 b->mult_work = 0; 2381 B->spptr = 0; 2382 B->info.nz_unneeded = (PetscReal)b->maxnz; 2383 b->keepzeroedrows = PETSC_FALSE; 2384 b->xtoy = 0; 2385 b->XtoY = 0; 2386 b->compressedrow.use = PETSC_FALSE; 2387 b->compressedrow.nrows = 0; 2388 b->compressedrow.i = PETSC_NULL; 2389 b->compressedrow.rindex = PETSC_NULL; 2390 b->compressedrow.checked = PETSC_FALSE; 2391 B->same_nonzero = PETSC_FALSE; 2392 2393 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2394 "MatInvertBlockDiagonal_SeqBAIJ", 2395 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2396 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2397 "MatStoreValues_SeqBAIJ", 2398 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2399 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2400 "MatRetrieveValues_SeqBAIJ", 2401 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2402 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2403 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2404 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2405 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2406 "MatConvert_SeqBAIJ_SeqAIJ", 2407 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2408 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2409 "MatConvert_SeqBAIJ_SeqSBAIJ", 2410 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2412 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2413 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 EXTERN_C_END 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2420 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2421 { 2422 Mat C; 2423 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2424 PetscErrorCode ierr; 2425 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2426 2427 PetscFunctionBegin; 2428 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2429 2430 *B = 0; 2431 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2432 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2433 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2434 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2435 c = (Mat_SeqBAIJ*)C->data; 2436 2437 C->rmap.N = A->rmap.N; 2438 C->cmap.N = A->cmap.N; 2439 C->rmap.bs = A->rmap.bs; 2440 c->bs2 = a->bs2; 2441 c->mbs = a->mbs; 2442 c->nbs = a->nbs; 2443 2444 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2445 for (i=0; i<mbs; i++) { 2446 c->imax[i] = a->imax[i]; 2447 c->ilen[i] = a->ilen[i]; 2448 } 2449 2450 /* allocate the matrix space */ 2451 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2452 c->singlemalloc = PETSC_TRUE; 2453 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2454 if (mbs > 0) { 2455 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2456 if (cpvalues == MAT_COPY_VALUES) { 2457 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2458 } else { 2459 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2460 } 2461 } 2462 c->sorted = a->sorted; 2463 c->roworiented = a->roworiented; 2464 c->nonew = a->nonew; 2465 2466 if (a->diag) { 2467 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2468 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2469 for (i=0; i<mbs; i++) { 2470 c->diag[i] = a->diag[i]; 2471 } 2472 } else c->diag = 0; 2473 c->nz = a->nz; 2474 c->maxnz = a->maxnz; 2475 c->solve_work = 0; 2476 c->mult_work = 0; 2477 c->free_a = PETSC_TRUE; 2478 c->free_ij = PETSC_TRUE; 2479 C->preallocated = PETSC_TRUE; 2480 C->assembled = PETSC_TRUE; 2481 2482 c->compressedrow.use = a->compressedrow.use; 2483 c->compressedrow.nrows = a->compressedrow.nrows; 2484 c->compressedrow.checked = a->compressedrow.checked; 2485 if ( a->compressedrow.checked && a->compressedrow.use){ 2486 i = a->compressedrow.nrows; 2487 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2488 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2489 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2490 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2491 } else { 2492 c->compressedrow.use = PETSC_FALSE; 2493 c->compressedrow.i = PETSC_NULL; 2494 c->compressedrow.rindex = PETSC_NULL; 2495 } 2496 C->same_nonzero = A->same_nonzero; 2497 *B = C; 2498 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 #undef __FUNCT__ 2503 #define __FUNCT__ "MatLoad_SeqBAIJ" 2504 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2505 { 2506 Mat_SeqBAIJ *a; 2507 Mat B; 2508 PetscErrorCode ierr; 2509 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2510 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2511 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2512 PetscInt *masked,nmask,tmp,bs2,ishift; 2513 PetscMPIInt size; 2514 int fd; 2515 PetscScalar *aa; 2516 MPI_Comm comm = ((PetscObject)viewer)->comm; 2517 2518 PetscFunctionBegin; 2519 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2520 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2521 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2522 bs2 = bs*bs; 2523 2524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2525 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2526 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2527 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2528 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2529 M = header[1]; N = header[2]; nz = header[3]; 2530 2531 if (header[3] < 0) { 2532 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2533 } 2534 2535 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2536 2537 /* 2538 This code adds extra rows to make sure the number of rows is 2539 divisible by the blocksize 2540 */ 2541 mbs = M/bs; 2542 extra_rows = bs - M + bs*(mbs); 2543 if (extra_rows == bs) extra_rows = 0; 2544 else mbs++; 2545 if (extra_rows) { 2546 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2547 } 2548 2549 /* read in row lengths */ 2550 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2551 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2552 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2553 2554 /* read in column indices */ 2555 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2556 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2557 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2558 2559 /* loop over row lengths determining block row lengths */ 2560 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2561 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2562 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2563 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2564 masked = mask + mbs; 2565 rowcount = 0; nzcount = 0; 2566 for (i=0; i<mbs; i++) { 2567 nmask = 0; 2568 for (j=0; j<bs; j++) { 2569 kmax = rowlengths[rowcount]; 2570 for (k=0; k<kmax; k++) { 2571 tmp = jj[nzcount++]/bs; 2572 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2573 } 2574 rowcount++; 2575 } 2576 browlengths[i] += nmask; 2577 /* zero out the mask elements we set */ 2578 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2579 } 2580 2581 /* create our matrix */ 2582 ierr = MatCreate(comm,&B); 2583 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2584 ierr = MatSetType(B,type);CHKERRQ(ierr); 2585 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2586 a = (Mat_SeqBAIJ*)B->data; 2587 2588 /* set matrix "i" values */ 2589 a->i[0] = 0; 2590 for (i=1; i<= mbs; i++) { 2591 a->i[i] = a->i[i-1] + browlengths[i-1]; 2592 a->ilen[i-1] = browlengths[i-1]; 2593 } 2594 a->nz = 0; 2595 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2596 2597 /* read in nonzero values */ 2598 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2599 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2600 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2601 2602 /* set "a" and "j" values into matrix */ 2603 nzcount = 0; jcount = 0; 2604 for (i=0; i<mbs; i++) { 2605 nzcountb = nzcount; 2606 nmask = 0; 2607 for (j=0; j<bs; j++) { 2608 kmax = rowlengths[i*bs+j]; 2609 for (k=0; k<kmax; k++) { 2610 tmp = jj[nzcount++]/bs; 2611 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2612 } 2613 } 2614 /* sort the masked values */ 2615 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2616 2617 /* set "j" values into matrix */ 2618 maskcount = 1; 2619 for (j=0; j<nmask; j++) { 2620 a->j[jcount++] = masked[j]; 2621 mask[masked[j]] = maskcount++; 2622 } 2623 /* set "a" values into matrix */ 2624 ishift = bs2*a->i[i]; 2625 for (j=0; j<bs; j++) { 2626 kmax = rowlengths[i*bs+j]; 2627 for (k=0; k<kmax; k++) { 2628 tmp = jj[nzcountb]/bs ; 2629 block = mask[tmp] - 1; 2630 point = jj[nzcountb] - bs*tmp; 2631 idx = ishift + bs2*block + j + bs*point; 2632 a->a[idx] = (MatScalar)aa[nzcountb++]; 2633 } 2634 } 2635 /* zero out the mask elements we set */ 2636 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2637 } 2638 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2639 2640 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2641 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2642 ierr = PetscFree(aa);CHKERRQ(ierr); 2643 ierr = PetscFree(jj);CHKERRQ(ierr); 2644 ierr = PetscFree(mask);CHKERRQ(ierr); 2645 2646 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2647 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 ierr = MatView_Private(B);CHKERRQ(ierr); 2649 2650 *A = B; 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatCreateSeqBAIJ" 2656 /*@C 2657 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2658 compressed row) format. For good matrix assembly performance the 2659 user should preallocate the matrix storage by setting the parameter nz 2660 (or the array nnz). By setting these parameters accurately, performance 2661 during matrix assembly can be increased by more than a factor of 50. 2662 2663 Collective on MPI_Comm 2664 2665 Input Parameters: 2666 + comm - MPI communicator, set to PETSC_COMM_SELF 2667 . bs - size of block 2668 . m - number of rows 2669 . n - number of columns 2670 . nz - number of nonzero blocks per block row (same for all rows) 2671 - nnz - array containing the number of nonzero blocks in the various block rows 2672 (possibly different for each block row) or PETSC_NULL 2673 2674 Output Parameter: 2675 . A - the matrix 2676 2677 Options Database Keys: 2678 . -mat_no_unroll - uses code that does not unroll the loops in the 2679 block calculations (much slower) 2680 . -mat_block_size - size of the blocks to use 2681 2682 Level: intermediate 2683 2684 Notes: 2685 The number of rows and columns must be divisible by blocksize. 2686 2687 If the nnz parameter is given then the nz parameter is ignored 2688 2689 A nonzero block is any block that as 1 or more nonzeros in it 2690 2691 The block AIJ format is fully compatible with standard Fortran 77 2692 storage. That is, the stored row and column indices can begin at 2693 either one (as in Fortran) or zero. See the users' manual for details. 2694 2695 Specify the preallocated storage with either nz or nnz (not both). 2696 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2697 allocation. For additional details, see the users manual chapter on 2698 matrices. 2699 2700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2701 @*/ 2702 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2703 { 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2708 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2709 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2710 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2711 PetscFunctionReturn(0); 2712 } 2713 2714 #undef __FUNCT__ 2715 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2716 /*@C 2717 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2718 per row in the matrix. For good matrix assembly performance the 2719 user should preallocate the matrix storage by setting the parameter nz 2720 (or the array nnz). By setting these parameters accurately, performance 2721 during matrix assembly can be increased by more than a factor of 50. 2722 2723 Collective on MPI_Comm 2724 2725 Input Parameters: 2726 + A - the matrix 2727 . bs - size of block 2728 . nz - number of block nonzeros per block row (same for all rows) 2729 - nnz - array containing the number of block nonzeros in the various block rows 2730 (possibly different for each block row) or PETSC_NULL 2731 2732 Options Database Keys: 2733 . -mat_no_unroll - uses code that does not unroll the loops in the 2734 block calculations (much slower) 2735 . -mat_block_size - size of the blocks to use 2736 2737 Level: intermediate 2738 2739 Notes: 2740 If the nnz parameter is given then the nz parameter is ignored 2741 2742 The block AIJ format is fully compatible with standard Fortran 77 2743 storage. That is, the stored row and column indices can begin at 2744 either one (as in Fortran) or zero. See the users' manual for details. 2745 2746 Specify the preallocated storage with either nz or nnz (not both). 2747 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2748 allocation. For additional details, see the users manual chapter on 2749 matrices. 2750 2751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2752 @*/ 2753 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2754 { 2755 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2756 2757 PetscFunctionBegin; 2758 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2759 if (f) { 2760 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2761 } 2762 PetscFunctionReturn(0); 2763 } 2764 2765 #undef __FUNCT__ 2766 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2767 /*@ 2768 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2769 (upper triangular entries in CSR format) provided by the user. 2770 2771 Collective on MPI_Comm 2772 2773 Input Parameters: 2774 + comm - must be an MPI communicator of size 1 2775 . bs - size of block 2776 . m - number of rows 2777 . n - number of columns 2778 . i - row indices 2779 . j - column indices 2780 - a - matrix values 2781 2782 Output Parameter: 2783 . mat - the matrix 2784 2785 Level: intermediate 2786 2787 Notes: 2788 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2789 once the matrix is destroyed 2790 2791 You cannot set new nonzero locations into this matrix, that will generate an error. 2792 2793 The i and j indices are 0 based 2794 2795 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2796 2797 @*/ 2798 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2799 { 2800 PetscErrorCode ierr; 2801 PetscInt ii; 2802 Mat_SeqBAIJ *baij; 2803 2804 PetscFunctionBegin; 2805 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2806 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2807 2808 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2809 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2810 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2811 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2812 baij = (Mat_SeqBAIJ*)(*mat)->data; 2813 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2814 2815 baij->i = i; 2816 baij->j = j; 2817 baij->a = a; 2818 baij->singlemalloc = PETSC_FALSE; 2819 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2820 baij->free_a = PETSC_FALSE; 2821 baij->free_ij = PETSC_FALSE; 2822 2823 for (ii=0; ii<m; ii++) { 2824 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2825 #if defined(PETSC_USE_DEBUG) 2826 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2827 #endif 2828 } 2829 #if defined(PETSC_USE_DEBUG) 2830 for (ii=0; ii<baij->i[m]; ii++) { 2831 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2832 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2833 } 2834 #endif 2835 2836 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2837 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2838 PetscFunctionReturn(0); 2839 } 2840