1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/inline/spops.h" 9 #include "petscsys.h" /*I "petscmat.h" I*/ 10 11 #include "src/inline/ilu.h" 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 15 /*@C 16 MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 17 18 Collective on Mat 19 20 Input Parameters: 21 . mat - the matrix 22 23 Level: advanced 24 @*/ 25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 26 { 27 PetscErrorCode ierr,(*f)(Mat); 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 31 if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 32 if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 33 34 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 35 if (f) { 36 ierr = (*f)(mat);CHKERRQ(ierr); 37 } else { 38 SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 39 } 40 PetscFunctionReturn(0); 41 } 42 43 EXTERN_C_BEGIN 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 47 { 48 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 49 PetscErrorCode ierr; 50 PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs; 51 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 52 53 PetscFunctionBegin; 54 if (a->idiagvalid) PetscFunctionReturn(0); 55 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56 diag_offset = a->diag; 57 if (!a->idiag) { 58 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59 } 60 diag = a->idiag; 61 mdiag = a->idiag+bs*bs*mbs; 62 /* factor and invert each block */ 63 switch (bs){ 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 70 diag += 4; 71 mdiag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 81 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 82 mdiag[8] = odiag[8]; 83 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 84 diag += 9; 85 mdiag += 9; 86 } 87 break; 88 case 4: 89 for (i=0; i<mbs; i++) { 90 odiag = v + 16*diag_offset[i]; 91 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 92 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 93 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 94 diag += 16; 95 mdiag += 16; 96 } 97 break; 98 case 5: 99 for (i=0; i<mbs; i++) { 100 odiag = v + 25*diag_offset[i]; 101 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 102 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 103 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 104 diag += 25; 105 mdiag += 25; 106 } 107 break; 108 default: 109 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 110 } 111 a->idiagvalid = PETSC_TRUE; 112 PetscFunctionReturn(0); 113 } 114 EXTERN_C_END 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 119 { 120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 121 PetscScalar *x,x1,x2,s1,s2; 122 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 123 PetscErrorCode ierr; 124 PetscInt m = a->mbs,i,i2,nz,idx; 125 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 126 127 PetscFunctionBegin; 128 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 129 its = its*lits; 130 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 131 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 132 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 133 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 134 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 135 136 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 137 138 diag = a->diag; 139 idiag = a->idiag; 140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 141 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 142 143 if (flag & SOR_ZERO_INITIAL_GUESS) { 144 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 145 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 146 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 147 i2 = 2; 148 idiag += 4; 149 for (i=1; i<m; i++) { 150 v = aa + 4*ai[i]; 151 vi = aj + ai[i]; 152 nz = diag[i] - ai[i]; 153 s1 = b[i2]; s2 = b[i2+1]; 154 while (nz--) { 155 idx = 2*(*vi++); 156 x1 = x[idx]; x2 = x[1+idx]; 157 s1 -= v[0]*x1 + v[2]*x2; 158 s2 -= v[1]*x1 + v[3]*x2; 159 v += 4; 160 } 161 x[i2] = idiag[0]*s1 + idiag[2]*s2; 162 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 163 idiag += 4; 164 i2 += 2; 165 } 166 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 167 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 168 } 169 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 170 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 171 i2 = 0; 172 mdiag = a->idiag+4*a->mbs; 173 for (i=0; i<m; i++) { 174 x1 = x[i2]; x2 = x[i2+1]; 175 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 176 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 177 mdiag += 4; 178 i2 += 2; 179 } 180 ierr = PetscLogFlops(6*m);CHKERRQ(ierr); 181 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 182 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 183 } 184 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 185 idiag = a->idiag+4*a->mbs - 4; 186 i2 = 2*m - 2; 187 x1 = x[i2]; x2 = x[i2+1]; 188 x[i2] = idiag[0]*x1 + idiag[2]*x2; 189 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 190 idiag -= 4; 191 i2 -= 2; 192 for (i=m-2; i>=0; i--) { 193 v = aa + 4*(diag[i]+1); 194 vi = aj + diag[i] + 1; 195 nz = ai[i+1] - diag[i] - 1; 196 s1 = x[i2]; s2 = x[i2+1]; 197 while (nz--) { 198 idx = 2*(*vi++); 199 x1 = x[idx]; x2 = x[1+idx]; 200 s1 -= v[0]*x1 + v[2]*x2; 201 s2 -= v[1]*x1 + v[3]*x2; 202 v += 4; 203 } 204 x[i2] = idiag[0]*s1 + idiag[2]*s2; 205 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 206 idiag -= 4; 207 i2 -= 2; 208 } 209 ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr); 210 } 211 } else { 212 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 213 } 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 222 { 223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 224 PetscScalar *x,x1,x2,x3,s1,s2,s3; 225 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 226 PetscErrorCode ierr; 227 PetscInt m = a->mbs,i,i2,nz,idx; 228 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 229 230 PetscFunctionBegin; 231 its = its*lits; 232 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 233 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 234 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 235 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 236 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 237 238 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 239 240 diag = a->diag; 241 idiag = a->idiag; 242 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 244 245 if (flag & SOR_ZERO_INITIAL_GUESS) { 246 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 247 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 248 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 249 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 250 i2 = 3; 251 idiag += 9; 252 for (i=1; i<m; i++) { 253 v = aa + 9*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 257 while (nz--) { 258 idx = 3*(*vi++); 259 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 260 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 261 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 262 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 263 v += 9; 264 } 265 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 266 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 267 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 268 idiag += 9; 269 i2 += 3; 270 } 271 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 272 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 273 } 274 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 275 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 276 i2 = 0; 277 mdiag = a->idiag+9*a->mbs; 278 for (i=0; i<m; i++) { 279 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 280 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 281 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 282 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 283 mdiag += 9; 284 i2 += 3; 285 } 286 ierr = PetscLogFlops(15*m);CHKERRQ(ierr); 287 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 288 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 289 } 290 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 291 idiag = a->idiag+9*a->mbs - 9; 292 i2 = 3*m - 3; 293 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 294 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 295 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 296 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 297 idiag -= 9; 298 i2 -= 3; 299 for (i=m-2; i>=0; i--) { 300 v = aa + 9*(diag[i]+1); 301 vi = aj + diag[i] + 1; 302 nz = ai[i+1] - diag[i] - 1; 303 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 304 while (nz--) { 305 idx = 3*(*vi++); 306 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 307 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 308 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 309 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 310 v += 9; 311 } 312 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 313 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 314 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 315 idiag -= 9; 316 i2 -= 3; 317 } 318 ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr); 319 } 320 } else { 321 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 322 } 323 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 324 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 331 { 332 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 333 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 334 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 335 PetscErrorCode ierr; 336 PetscInt m = a->mbs,i,i2,nz,idx; 337 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 338 339 PetscFunctionBegin; 340 its = its*lits; 341 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 342 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 343 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 344 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 345 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 346 347 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 348 349 diag = a->diag; 350 idiag = a->idiag; 351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 352 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 353 354 if (flag & SOR_ZERO_INITIAL_GUESS) { 355 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 356 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 357 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 358 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 359 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 360 i2 = 4; 361 idiag += 16; 362 for (i=1; i<m; i++) { 363 v = aa + 16*ai[i]; 364 vi = aj + ai[i]; 365 nz = diag[i] - ai[i]; 366 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 367 while (nz--) { 368 idx = 4*(*vi++); 369 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 370 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 371 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 372 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 373 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 374 v += 16; 375 } 376 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 377 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 378 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 379 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 380 idiag += 16; 381 i2 += 4; 382 } 383 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 384 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 385 } 386 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 387 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 388 i2 = 0; 389 mdiag = a->idiag+16*a->mbs; 390 for (i=0; i<m; i++) { 391 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 392 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 393 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 394 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 395 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 396 mdiag += 16; 397 i2 += 4; 398 } 399 ierr = PetscLogFlops(28*m);CHKERRQ(ierr); 400 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 401 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 404 idiag = a->idiag+16*a->mbs - 16; 405 i2 = 4*m - 4; 406 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 407 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 408 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 409 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 410 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 411 idiag -= 16; 412 i2 -= 4; 413 for (i=m-2; i>=0; i--) { 414 v = aa + 16*(diag[i]+1); 415 vi = aj + diag[i] + 1; 416 nz = ai[i+1] - diag[i] - 1; 417 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 418 while (nz--) { 419 idx = 4*(*vi++); 420 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 421 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 422 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 423 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 424 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 425 v += 16; 426 } 427 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 428 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 429 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 430 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 431 idiag -= 16; 432 i2 -= 4; 433 } 434 ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr); 435 } 436 } else { 437 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 438 } 439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 440 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 447 { 448 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 449 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 450 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 451 PetscErrorCode ierr; 452 PetscInt m = a->mbs,i,i2,nz,idx; 453 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 454 455 PetscFunctionBegin; 456 its = its*lits; 457 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 458 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 459 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 460 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 461 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 462 463 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 464 465 diag = a->diag; 466 idiag = a->idiag; 467 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 468 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 469 470 if (flag & SOR_ZERO_INITIAL_GUESS) { 471 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 472 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 473 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 474 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 475 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 476 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 477 i2 = 5; 478 idiag += 25; 479 for (i=1; i<m; i++) { 480 v = aa + 25*ai[i]; 481 vi = aj + ai[i]; 482 nz = diag[i] - ai[i]; 483 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 484 while (nz--) { 485 idx = 5*(*vi++); 486 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 487 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 488 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 489 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 490 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 491 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 492 v += 25; 493 } 494 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 495 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 496 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 497 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 498 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 499 idiag += 25; 500 i2 += 5; 501 } 502 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 503 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 504 } 505 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 506 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 507 i2 = 0; 508 mdiag = a->idiag+25*a->mbs; 509 for (i=0; i<m; i++) { 510 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 511 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 512 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 513 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 514 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 515 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 516 mdiag += 25; 517 i2 += 5; 518 } 519 ierr = PetscLogFlops(45*m);CHKERRQ(ierr); 520 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 521 ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr); 522 } 523 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 524 idiag = a->idiag+25*a->mbs - 25; 525 i2 = 5*m - 5; 526 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 527 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 528 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 529 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 530 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 531 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 532 idiag -= 25; 533 i2 -= 5; 534 for (i=m-2; i>=0; i--) { 535 v = aa + 25*(diag[i]+1); 536 vi = aj + diag[i] + 1; 537 nz = ai[i+1] - diag[i] - 1; 538 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 539 while (nz--) { 540 idx = 5*(*vi++); 541 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 542 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 543 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 544 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 545 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 546 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 547 v += 25; 548 } 549 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 550 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 551 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 552 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 553 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 554 idiag -= 25; 555 i2 -= 5; 556 } 557 ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr); 558 } 559 } else { 560 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 561 } 562 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 Special version for direct calls from Fortran (Used in PETSc-fun3d) 569 */ 570 #if defined(PETSC_HAVE_FORTRAN_CAPS) 571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 573 #define matsetvaluesblocked4_ matsetvaluesblocked4 574 #endif 575 576 EXTERN_C_BEGIN 577 #undef __FUNCT__ 578 #define __FUNCT__ "matsetvaluesblocked4_" 579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 580 { 581 Mat A = *AA; 582 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 583 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 584 PetscInt *ai=a->i,*ailen=a->ilen; 585 PetscInt *aj=a->j,stepval,lastcol = -1; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 591 stepval = (n-1)*4; 592 for (k=0; k<m; k++) { /* loop over added rows */ 593 row = im[k]; 594 rp = aj + ai[row]; 595 ap = aa + 16*ai[row]; 596 nrow = ailen[row]; 597 low = 0; 598 high = nrow; 599 for (l=0; l<n; l++) { /* loop over added columns */ 600 col = in[l]; 601 if (col <= lastcol) low = 0; else high = nrow; 602 lastcol = col; 603 value = v + k*(stepval+4 + l)*4; 604 while (high-low > 7) { 605 t = (low+high)/2; 606 if (rp[t] > col) high = t; 607 else low = t; 608 } 609 for (i=low; i<high; i++) { 610 if (rp[i] > col) break; 611 if (rp[i] == col) { 612 bap = ap + 16*i; 613 for (ii=0; ii<4; ii++,value+=stepval) { 614 for (jj=ii; jj<16; jj+=4) { 615 bap[jj] += *value++; 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 N = nrow++ - 1; 622 high++; /* added new column index thus must search to one higher than before */ 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 627 } 628 if (N >= i) { 629 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 630 } 631 rp[i] = col; 632 bap = ap + 16*i; 633 for (ii=0; ii<4; ii++,value+=stepval) { 634 for (jj=ii; jj<16; jj+=4) { 635 bap[jj] = *value++; 636 } 637 } 638 noinsert2:; 639 low = i; 640 } 641 ailen[row] = nrow; 642 } 643 PetscFunctionReturnVoid(); 644 } 645 EXTERN_C_END 646 647 #if defined(PETSC_HAVE_FORTRAN_CAPS) 648 #define matsetvalues4_ MATSETVALUES4 649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 650 #define matsetvalues4_ matsetvalues4 651 #endif 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatSetValues4_" 656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 657 { 658 Mat A = *AA; 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 660 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 661 PetscInt *ai=a->i,*ailen=a->ilen; 662 PetscInt *aj=a->j,brow,bcol; 663 PetscInt ridx,cidx,lastcol = -1; 664 MatScalar *ap,value,*aa=a->a,*bap; 665 666 PetscFunctionBegin; 667 for (k=0; k<m; k++) { /* loop over added rows */ 668 row = im[k]; brow = row/4; 669 rp = aj + ai[brow]; 670 ap = aa + 16*ai[brow]; 671 nrow = ailen[brow]; 672 low = 0; 673 high = nrow; 674 for (l=0; l<n; l++) { /* loop over added columns */ 675 col = in[l]; bcol = col/4; 676 ridx = row % 4; cidx = col % 4; 677 value = v[l + k*n]; 678 if (col <= lastcol) low = 0; else high = nrow; 679 lastcol = col; 680 while (high-low > 7) { 681 t = (low+high)/2; 682 if (rp[t] > bcol) high = t; 683 else low = t; 684 } 685 for (i=low; i<high; i++) { 686 if (rp[i] > bcol) break; 687 if (rp[i] == bcol) { 688 bap = ap + 16*i + 4*cidx + ridx; 689 *bap += value; 690 goto noinsert1; 691 } 692 } 693 N = nrow++ - 1; 694 high++; /* added new column thus must search to one higher than before */ 695 /* shift up all the later entries in this row */ 696 for (ii=N; ii>=i; ii--) { 697 rp[ii+1] = rp[ii]; 698 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 699 } 700 if (N>=i) { 701 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 702 } 703 rp[i] = bcol; 704 ap[16*i + 4*cidx + ridx] = value; 705 noinsert1:; 706 low = i; 707 } 708 ailen[brow] = nrow; 709 } 710 PetscFunctionReturnVoid(); 711 } 712 EXTERN_C_END 713 714 /* UGLY, ugly, ugly 715 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 716 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 717 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 718 converts the entries into single precision and then calls ..._MatScalar() to put them 719 into the single precision data structures. 720 */ 721 #if defined(PETSC_USE_MAT_SINGLE) 722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 723 #else 724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 725 #endif 726 727 #define CHUNKSIZE 10 728 729 /* 730 Checks for missing diagonals 731 */ 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 PetscErrorCode ierr; 738 PetscInt *diag,*jj = a->j,i; 739 740 PetscFunctionBegin; 741 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 742 diag = a->diag; 743 for (i=0; i<a->mbs; i++) { 744 if (jj[diag[i]] != i) { 745 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 746 } 747 } 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 754 { 755 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 756 PetscErrorCode ierr; 757 PetscInt i,j,m = a->mbs; 758 759 PetscFunctionBegin; 760 if (!a->diag) { 761 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 762 } 763 for (i=0; i<m; i++) { 764 a->diag[i] = a->i[i+1]; 765 for (j=a->i[i]; j<a->i[i+1]; j++) { 766 if (a->j[j] == i) { 767 a->diag[i] = j; 768 break; 769 } 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 777 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 781 { 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 783 PetscErrorCode ierr; 784 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 785 PetscInt *tia, *tja; 786 787 PetscFunctionBegin; 788 *nn = n; 789 if (!ia) PetscFunctionReturn(0); 790 if (symmetric) { 791 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 792 } else { 793 tia = a->i; tja = a->j; 794 } 795 796 if (!blockcompressed) { 797 /* malloc & create the natural set of indices */ 798 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 799 for (i=0; i<n+1; i++) { 800 for (j=0; j<bs; j++) { 801 *ia[i*bs+j] = tia[i]*bs+j; 802 } 803 } 804 for (i=0; i<nz; i++) { 805 for (j=0; j<bs; j++) { 806 *ja[i*bs+j] = tia[i]*bs+j; 807 } 808 } 809 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 810 ierr = PetscFree(tia);CHKERRQ(ierr); 811 ierr = PetscFree(tja);CHKERRQ(ierr); 812 } 813 } else { 814 *ia = tia; 815 *ja = tja; 816 } 817 if (oshift == 1) { 818 for (i=0; i<nz; i++) (*ja)[i]++; 819 for (i=0; i<n+1; i++) (*ia)[i]++; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 #undef __FUNCT__ 825 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 826 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 827 { 828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 829 PetscErrorCode ierr; 830 PetscInt i,n = a->mbs,nz = a->i[n]; 831 832 PetscFunctionBegin; 833 if (!ia) PetscFunctionReturn(0); 834 if (!blockcompressed) { 835 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 836 }else if (symmetric) { 837 ierr = PetscFree(*ia);CHKERRQ(ierr); 838 ierr = PetscFree(*ja);CHKERRQ(ierr); 839 } else if (oshift == 1) { /* blockcompressed */ 840 for (i=0; i<nz; i++) a->j[i]--; 841 for (i=0; i<n+1; i++) a->i[i]--; 842 } 843 PetscFunctionReturn(0); 844 } 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDestroy_SeqBAIJ" 848 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 849 { 850 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 851 PetscErrorCode ierr; 852 853 PetscFunctionBegin; 854 #if defined(PETSC_USE_LOG) 855 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz); 856 #endif 857 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 858 if (a->row) { 859 ierr = ISDestroy(a->row);CHKERRQ(ierr); 860 } 861 if (a->col) { 862 ierr = ISDestroy(a->col);CHKERRQ(ierr); 863 } 864 ierr = PetscFree(a->diag);CHKERRQ(ierr); 865 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 866 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 867 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 868 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 869 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 870 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 871 #if defined(PETSC_USE_MAT_SINGLE) 872 ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr); 873 #endif 874 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 875 if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 876 877 ierr = PetscFree(a);CHKERRQ(ierr); 878 879 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 880 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 881 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 882 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 883 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 884 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 885 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 886 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatSetOption_SeqBAIJ" 892 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 893 { 894 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 895 PetscErrorCode ierr; 896 897 PetscFunctionBegin; 898 switch (op) { 899 case MAT_ROW_ORIENTED: 900 a->roworiented = PETSC_TRUE; 901 break; 902 case MAT_COLUMN_ORIENTED: 903 a->roworiented = PETSC_FALSE; 904 break; 905 case MAT_COLUMNS_SORTED: 906 a->sorted = PETSC_TRUE; 907 break; 908 case MAT_COLUMNS_UNSORTED: 909 a->sorted = PETSC_FALSE; 910 break; 911 case MAT_KEEP_ZEROED_ROWS: 912 a->keepzeroedrows = PETSC_TRUE; 913 break; 914 case MAT_NO_NEW_NONZERO_LOCATIONS: 915 a->nonew = 1; 916 break; 917 case MAT_NEW_NONZERO_LOCATION_ERR: 918 a->nonew = -1; 919 break; 920 case MAT_NEW_NONZERO_ALLOCATION_ERR: 921 a->nonew = -2; 922 break; 923 case MAT_YES_NEW_NONZERO_LOCATIONS: 924 a->nonew = 0; 925 break; 926 case MAT_ROWS_SORTED: 927 case MAT_ROWS_UNSORTED: 928 case MAT_YES_NEW_DIAGONALS: 929 case MAT_IGNORE_OFF_PROC_ENTRIES: 930 case MAT_USE_HASH_TABLE: 931 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 932 break; 933 case MAT_NO_NEW_DIAGONALS: 934 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 935 case MAT_SYMMETRIC: 936 case MAT_STRUCTURALLY_SYMMETRIC: 937 case MAT_NOT_SYMMETRIC: 938 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 939 case MAT_HERMITIAN: 940 case MAT_NOT_HERMITIAN: 941 case MAT_SYMMETRY_ETERNAL: 942 case MAT_NOT_SYMMETRY_ETERNAL: 943 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 944 break; 945 default: 946 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatGetRow_SeqBAIJ" 953 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 954 { 955 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 958 MatScalar *aa,*aa_i; 959 PetscScalar *v_i; 960 961 PetscFunctionBegin; 962 bs = A->rmap.bs; 963 ai = a->i; 964 aj = a->j; 965 aa = a->a; 966 bs2 = a->bs2; 967 968 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 969 970 bn = row/bs; /* Block number */ 971 bp = row % bs; /* Block Position */ 972 M = ai[bn+1] - ai[bn]; 973 *nz = bs*M; 974 975 if (v) { 976 *v = 0; 977 if (*nz) { 978 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 979 for (i=0; i<M; i++) { /* for each block in the block row */ 980 v_i = *v + i*bs; 981 aa_i = aa + bs2*(ai[bn] + i); 982 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 983 } 984 } 985 } 986 987 if (idx) { 988 *idx = 0; 989 if (*nz) { 990 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 991 for (i=0; i<M; i++) { /* for each block in the block row */ 992 idx_i = *idx + i*bs; 993 itmp = bs*aj[ai[bn] + i]; 994 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 995 } 996 } 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1003 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1004 { 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1009 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1015 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 1016 { 1017 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 1018 Mat C; 1019 PetscErrorCode ierr; 1020 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1021 PetscInt *rows,*cols,bs2=a->bs2; 1022 PetscScalar *array; 1023 1024 PetscFunctionBegin; 1025 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 1026 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1027 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1028 1029 #if defined(PETSC_USE_MAT_SINGLE) 1030 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 1031 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 1032 #else 1033 array = a->a; 1034 #endif 1035 1036 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1037 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1038 ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr); 1039 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1040 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1041 ierr = PetscFree(col);CHKERRQ(ierr); 1042 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1043 cols = rows + bs; 1044 for (i=0; i<mbs; i++) { 1045 cols[0] = i*bs; 1046 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1047 len = ai[i+1] - ai[i]; 1048 for (j=0; j<len; j++) { 1049 rows[0] = (*aj++)*bs; 1050 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1051 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1052 array += bs2; 1053 } 1054 } 1055 ierr = PetscFree(rows);CHKERRQ(ierr); 1056 #if defined(PETSC_USE_MAT_SINGLE) 1057 ierr = PetscFree(array); 1058 #endif 1059 1060 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1061 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1062 1063 if (B) { 1064 *B = C; 1065 } else { 1066 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1073 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1074 { 1075 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2; 1078 int fd; 1079 PetscScalar *aa; 1080 FILE *file; 1081 1082 PetscFunctionBegin; 1083 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1084 ierr = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1085 col_lens[0] = MAT_FILE_COOKIE; 1086 1087 col_lens[1] = A->rmap.N; 1088 col_lens[2] = A->cmap.n; 1089 col_lens[3] = a->nz*bs2; 1090 1091 /* store lengths of each row and write (including header) to file */ 1092 count = 0; 1093 for (i=0; i<a->mbs; i++) { 1094 for (j=0; j<bs; j++) { 1095 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1096 } 1097 } 1098 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1099 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1100 1101 /* store column indices (zero start index) */ 1102 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1103 count = 0; 1104 for (i=0; i<a->mbs; i++) { 1105 for (j=0; j<bs; j++) { 1106 for (k=a->i[i]; k<a->i[i+1]; k++) { 1107 for (l=0; l<bs; l++) { 1108 jj[count++] = bs*a->j[k] + l; 1109 } 1110 } 1111 } 1112 } 1113 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1114 ierr = PetscFree(jj);CHKERRQ(ierr); 1115 1116 /* store nonzero values */ 1117 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1118 count = 0; 1119 for (i=0; i<a->mbs; i++) { 1120 for (j=0; j<bs; j++) { 1121 for (k=a->i[i]; k<a->i[i+1]; k++) { 1122 for (l=0; l<bs; l++) { 1123 aa[count++] = a->a[bs2*k + l*bs + j]; 1124 } 1125 } 1126 } 1127 } 1128 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1129 ierr = PetscFree(aa);CHKERRQ(ierr); 1130 1131 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1132 if (file) { 1133 fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs); 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1140 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1141 { 1142 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1143 PetscErrorCode ierr; 1144 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 1145 PetscViewerFormat format; 1146 1147 PetscFunctionBegin; 1148 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1149 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1150 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1151 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1152 Mat aij; 1153 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1154 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1155 ierr = MatDestroy(aij);CHKERRQ(ierr); 1156 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1157 PetscFunctionReturn(0); 1158 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1159 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1160 for (i=0; i<a->mbs; i++) { 1161 for (j=0; j<bs; j++) { 1162 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1163 for (k=a->i[i]; k<a->i[i+1]; k++) { 1164 for (l=0; l<bs; l++) { 1165 #if defined(PETSC_USE_COMPLEX) 1166 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1167 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1168 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1169 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1170 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1171 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1172 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1173 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1174 } 1175 #else 1176 if (a->a[bs2*k + l*bs + j] != 0.0) { 1177 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1178 } 1179 #endif 1180 } 1181 } 1182 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1183 } 1184 } 1185 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1186 } else { 1187 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1188 for (i=0; i<a->mbs; i++) { 1189 for (j=0; j<bs; j++) { 1190 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1191 for (k=a->i[i]; k<a->i[i+1]; k++) { 1192 for (l=0; l<bs; l++) { 1193 #if defined(PETSC_USE_COMPLEX) 1194 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1195 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1196 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1197 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1198 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1199 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1200 } else { 1201 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1202 } 1203 #else 1204 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1205 #endif 1206 } 1207 } 1208 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1209 } 1210 } 1211 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1212 } 1213 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1219 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1220 { 1221 Mat A = (Mat) Aa; 1222 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 1225 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1226 MatScalar *aa; 1227 PetscViewer viewer; 1228 1229 PetscFunctionBegin; 1230 1231 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1232 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1233 1234 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1235 1236 /* loop over matrix elements drawing boxes */ 1237 color = PETSC_DRAW_BLUE; 1238 for (i=0,row=0; i<mbs; i++,row+=bs) { 1239 for (j=a->i[i]; j<a->i[i+1]; j++) { 1240 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1241 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1242 aa = a->a + j*bs2; 1243 for (k=0; k<bs; k++) { 1244 for (l=0; l<bs; l++) { 1245 if (PetscRealPart(*aa++) >= 0.) continue; 1246 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1247 } 1248 } 1249 } 1250 } 1251 color = PETSC_DRAW_CYAN; 1252 for (i=0,row=0; i<mbs; i++,row+=bs) { 1253 for (j=a->i[i]; j<a->i[i+1]; j++) { 1254 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1255 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1256 aa = a->a + j*bs2; 1257 for (k=0; k<bs; k++) { 1258 for (l=0; l<bs; l++) { 1259 if (PetscRealPart(*aa++) != 0.) continue; 1260 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1261 } 1262 } 1263 } 1264 } 1265 1266 color = PETSC_DRAW_RED; 1267 for (i=0,row=0; i<mbs; i++,row+=bs) { 1268 for (j=a->i[i]; j<a->i[i+1]; j++) { 1269 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 1270 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1271 aa = a->a + j*bs2; 1272 for (k=0; k<bs; k++) { 1273 for (l=0; l<bs; l++) { 1274 if (PetscRealPart(*aa++) <= 0.) continue; 1275 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1276 } 1277 } 1278 } 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1285 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1286 { 1287 PetscErrorCode ierr; 1288 PetscReal xl,yl,xr,yr,w,h; 1289 PetscDraw draw; 1290 PetscTruth isnull; 1291 1292 PetscFunctionBegin; 1293 1294 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1295 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1296 1297 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1298 xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 1299 xr += w; yr += h; xl = -w; yl = -h; 1300 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1301 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1302 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatView_SeqBAIJ" 1308 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1309 { 1310 PetscErrorCode ierr; 1311 PetscTruth iascii,isbinary,isdraw; 1312 1313 PetscFunctionBegin; 1314 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1315 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1316 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1317 if (iascii){ 1318 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1319 } else if (isbinary) { 1320 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1321 } else if (isdraw) { 1322 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1323 } else { 1324 Mat B; 1325 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1326 ierr = MatView(B,viewer);CHKERRQ(ierr); 1327 ierr = MatDestroy(B);CHKERRQ(ierr); 1328 } 1329 PetscFunctionReturn(0); 1330 } 1331 1332 1333 #undef __FUNCT__ 1334 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1335 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1336 { 1337 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1338 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1339 PetscInt *ai = a->i,*ailen = a->ilen; 1340 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 1341 MatScalar *ap,*aa = a->a,zero = 0.0; 1342 1343 PetscFunctionBegin; 1344 for (k=0; k<m; k++) { /* loop over rows */ 1345 row = im[k]; brow = row/bs; 1346 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1347 if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1348 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1349 nrow = ailen[brow]; 1350 for (l=0; l<n; l++) { /* loop over columns */ 1351 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1352 if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1353 col = in[l] ; 1354 bcol = col/bs; 1355 cidx = col%bs; 1356 ridx = row%bs; 1357 high = nrow; 1358 low = 0; /* assume unsorted */ 1359 while (high-low > 5) { 1360 t = (low+high)/2; 1361 if (rp[t] > bcol) high = t; 1362 else low = t; 1363 } 1364 for (i=low; i<high; i++) { 1365 if (rp[i] > bcol) break; 1366 if (rp[i] == bcol) { 1367 *v++ = ap[bs2*i+bs*cidx+ridx]; 1368 goto finished; 1369 } 1370 } 1371 *v++ = zero; 1372 finished:; 1373 } 1374 } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #if defined(PETSC_USE_MAT_SINGLE) 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1381 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1382 { 1383 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1384 PetscErrorCode ierr; 1385 PetscInt i,N = m*n*b->bs2; 1386 MatScalar *vsingle; 1387 1388 PetscFunctionBegin; 1389 if (N > b->setvalueslen) { 1390 ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr); 1391 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1392 b->setvalueslen = N; 1393 } 1394 vsingle = b->setvaluescopy; 1395 for (i=0; i<N; i++) { 1396 vsingle[i] = v[i]; 1397 } 1398 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 #endif 1402 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1406 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1409 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1410 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1411 PetscErrorCode ierr; 1412 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 1413 PetscTruth roworiented=a->roworiented; 1414 const MatScalar *value = v; 1415 MatScalar *ap,*aa = a->a,*bap; 1416 1417 PetscFunctionBegin; 1418 if (roworiented) { 1419 stepval = (n-1)*bs; 1420 } else { 1421 stepval = (m-1)*bs; 1422 } 1423 for (k=0; k<m; k++) { /* loop over added rows */ 1424 row = im[k]; 1425 if (row < 0) continue; 1426 #if defined(PETSC_USE_DEBUG) 1427 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1428 #endif 1429 rp = aj + ai[row]; 1430 ap = aa + bs2*ai[row]; 1431 rmax = imax[row]; 1432 nrow = ailen[row]; 1433 low = 0; 1434 high = nrow; 1435 for (l=0; l<n; l++) { /* loop over added columns */ 1436 if (in[l] < 0) continue; 1437 #if defined(PETSC_USE_DEBUG) 1438 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1439 #endif 1440 col = in[l]; 1441 if (roworiented) { 1442 value = v + k*(stepval+bs)*bs + l*bs; 1443 } else { 1444 value = v + l*(stepval+bs)*bs + k*bs; 1445 } 1446 if (col <= lastcol) low = 0; else high = nrow; 1447 lastcol = col; 1448 while (high-low > 7) { 1449 t = (low+high)/2; 1450 if (rp[t] > col) high = t; 1451 else low = t; 1452 } 1453 for (i=low; i<high; i++) { 1454 if (rp[i] > col) break; 1455 if (rp[i] == col) { 1456 bap = ap + bs2*i; 1457 if (roworiented) { 1458 if (is == ADD_VALUES) { 1459 for (ii=0; ii<bs; ii++,value+=stepval) { 1460 for (jj=ii; jj<bs2; jj+=bs) { 1461 bap[jj] += *value++; 1462 } 1463 } 1464 } else { 1465 for (ii=0; ii<bs; ii++,value+=stepval) { 1466 for (jj=ii; jj<bs2; jj+=bs) { 1467 bap[jj] = *value++; 1468 } 1469 } 1470 } 1471 } else { 1472 if (is == ADD_VALUES) { 1473 for (ii=0; ii<bs; ii++,value+=stepval) { 1474 for (jj=0; jj<bs; jj++) { 1475 *bap++ += *value++; 1476 } 1477 } 1478 } else { 1479 for (ii=0; ii<bs; ii++,value+=stepval) { 1480 for (jj=0; jj<bs; jj++) { 1481 *bap++ = *value++; 1482 } 1483 } 1484 } 1485 } 1486 goto noinsert2; 1487 } 1488 } 1489 if (nonew == 1) goto noinsert2; 1490 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1491 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1492 N = nrow++ - 1; high++; 1493 /* shift up all the later entries in this row */ 1494 for (ii=N; ii>=i; ii--) { 1495 rp[ii+1] = rp[ii]; 1496 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1497 } 1498 if (N >= i) { 1499 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1500 } 1501 rp[i] = col; 1502 bap = ap + bs2*i; 1503 if (roworiented) { 1504 for (ii=0; ii<bs; ii++,value+=stepval) { 1505 for (jj=ii; jj<bs2; jj+=bs) { 1506 bap[jj] = *value++; 1507 } 1508 } 1509 } else { 1510 for (ii=0; ii<bs; ii++,value+=stepval) { 1511 for (jj=0; jj<bs; jj++) { 1512 *bap++ = *value++; 1513 } 1514 } 1515 } 1516 noinsert2:; 1517 low = i; 1518 } 1519 ailen[row] = nrow; 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1526 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1527 { 1528 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1529 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1530 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 1531 PetscErrorCode ierr; 1532 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1533 MatScalar *aa = a->a,*ap; 1534 PetscReal ratio=0.6; 1535 1536 PetscFunctionBegin; 1537 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1538 1539 if (m) rmax = ailen[0]; 1540 for (i=1; i<mbs; i++) { 1541 /* move each row back by the amount of empty slots (fshift) before it*/ 1542 fshift += imax[i-1] - ailen[i-1]; 1543 rmax = PetscMax(rmax,ailen[i]); 1544 if (fshift) { 1545 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1546 N = ailen[i]; 1547 for (j=0; j<N; j++) { 1548 ip[j-fshift] = ip[j]; 1549 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1550 } 1551 } 1552 ai[i] = ai[i-1] + ailen[i-1]; 1553 } 1554 if (mbs) { 1555 fshift += imax[mbs-1] - ailen[mbs-1]; 1556 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1557 } 1558 /* reset ilen and imax for each row */ 1559 for (i=0; i<mbs; i++) { 1560 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1561 } 1562 a->nz = ai[mbs]; 1563 1564 /* diagonals may have moved, so kill the diagonal pointers */ 1565 a->idiagvalid = PETSC_FALSE; 1566 if (fshift && a->diag) { 1567 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1568 ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1569 a->diag = 0; 1570 } 1571 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1572 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1573 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1574 a->reallocs = 0; 1575 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1576 1577 /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 1578 if (a->compressedrow.use){ 1579 ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1580 } 1581 1582 A->same_nonzero = PETSC_TRUE; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 /* 1587 This function returns an array of flags which indicate the locations of contiguous 1588 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1589 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1590 Assume: sizes should be long enough to hold all the values. 1591 */ 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1594 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1595 { 1596 PetscInt i,j,k,row; 1597 PetscTruth flg; 1598 1599 PetscFunctionBegin; 1600 for (i=0,j=0; i<n; j++) { 1601 row = idx[i]; 1602 if (row%bs!=0) { /* Not the begining of a block */ 1603 sizes[j] = 1; 1604 i++; 1605 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1606 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1607 i++; 1608 } else { /* Begining of the block, so check if the complete block exists */ 1609 flg = PETSC_TRUE; 1610 for (k=1; k<bs; k++) { 1611 if (row+k != idx[i+k]) { /* break in the block */ 1612 flg = PETSC_FALSE; 1613 break; 1614 } 1615 } 1616 if (flg) { /* No break in the bs */ 1617 sizes[j] = bs; 1618 i+= bs; 1619 } else { 1620 sizes[j] = 1; 1621 i++; 1622 } 1623 } 1624 } 1625 *bs_max = j; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1631 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 1632 { 1633 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1634 PetscErrorCode ierr; 1635 PetscInt i,j,k,count,*rows; 1636 PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max; 1637 PetscScalar zero = 0.0; 1638 MatScalar *aa; 1639 1640 PetscFunctionBegin; 1641 /* Make a copy of the IS and sort it */ 1642 /* allocate memory for rows,sizes */ 1643 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1644 sizes = rows + is_n; 1645 1646 /* copy IS values to rows, and sort them */ 1647 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1648 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1649 if (baij->keepzeroedrows) { 1650 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1651 bs_max = is_n; 1652 A->same_nonzero = PETSC_TRUE; 1653 } else { 1654 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1655 A->same_nonzero = PETSC_FALSE; 1656 } 1657 1658 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1659 row = rows[j]; 1660 if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1661 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1662 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1663 if (sizes[i] == bs && !baij->keepzeroedrows) { 1664 if (diag != 0.0) { 1665 if (baij->ilen[row/bs] > 0) { 1666 baij->ilen[row/bs] = 1; 1667 baij->j[baij->i[row/bs]] = row/bs; 1668 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1669 } 1670 /* Now insert all the diagonal values for this bs */ 1671 for (k=0; k<bs; k++) { 1672 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 1673 } 1674 } else { /* (diag == 0.0) */ 1675 baij->ilen[row/bs] = 0; 1676 } /* end (diag == 0.0) */ 1677 } else { /* (sizes[i] != bs) */ 1678 #if defined (PETSC_USE_DEBUG) 1679 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1680 #endif 1681 for (k=0; k<count; k++) { 1682 aa[0] = zero; 1683 aa += bs; 1684 } 1685 if (diag != 0.0) { 1686 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 1687 } 1688 } 1689 } 1690 1691 ierr = PetscFree(rows);CHKERRQ(ierr); 1692 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1698 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1699 { 1700 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1701 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 1702 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1703 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 1704 PetscErrorCode ierr; 1705 PetscInt ridx,cidx,bs2=a->bs2; 1706 PetscTruth roworiented=a->roworiented; 1707 MatScalar *ap,value,*aa=a->a,*bap; 1708 1709 PetscFunctionBegin; 1710 for (k=0; k<m; k++) { /* loop over added rows */ 1711 row = im[k]; 1712 brow = row/bs; 1713 if (row < 0) continue; 1714 #if defined(PETSC_USE_DEBUG) 1715 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 1716 #endif 1717 rp = aj + ai[brow]; 1718 ap = aa + bs2*ai[brow]; 1719 rmax = imax[brow]; 1720 nrow = ailen[brow]; 1721 low = 0; 1722 high = nrow; 1723 for (l=0; l<n; l++) { /* loop over added columns */ 1724 if (in[l] < 0) continue; 1725 #if defined(PETSC_USE_DEBUG) 1726 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 1727 #endif 1728 col = in[l]; bcol = col/bs; 1729 ridx = row % bs; cidx = col % bs; 1730 if (roworiented) { 1731 value = v[l + k*n]; 1732 } else { 1733 value = v[k + l*m]; 1734 } 1735 if (col <= lastcol) low = 0; else high = nrow; 1736 lastcol = col; 1737 while (high-low > 7) { 1738 t = (low+high)/2; 1739 if (rp[t] > bcol) high = t; 1740 else low = t; 1741 } 1742 for (i=low; i<high; i++) { 1743 if (rp[i] > bcol) break; 1744 if (rp[i] == bcol) { 1745 bap = ap + bs2*i + bs*cidx + ridx; 1746 if (is == ADD_VALUES) *bap += value; 1747 else *bap = value; 1748 goto noinsert1; 1749 } 1750 } 1751 if (nonew == 1) goto noinsert1; 1752 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1753 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1754 N = nrow++ - 1; high++; 1755 /* shift up all the later entries in this row */ 1756 for (ii=N; ii>=i; ii--) { 1757 rp[ii+1] = rp[ii]; 1758 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1759 } 1760 if (N>=i) { 1761 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1762 } 1763 rp[i] = bcol; 1764 ap[bs2*i + bs*cidx + ridx] = value; 1765 a->nz++; 1766 noinsert1:; 1767 low = i; 1768 } 1769 ailen[brow] = nrow; 1770 } 1771 A->same_nonzero = PETSC_FALSE; 1772 PetscFunctionReturn(0); 1773 } 1774 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1778 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1779 { 1780 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1781 Mat outA; 1782 PetscErrorCode ierr; 1783 PetscTruth row_identity,col_identity; 1784 1785 PetscFunctionBegin; 1786 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1787 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1788 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1789 if (!row_identity || !col_identity) { 1790 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1791 } 1792 1793 outA = inA; 1794 inA->factor = FACTOR_LU; 1795 1796 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1797 1798 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1799 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 1800 a->row = row; 1801 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1802 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1803 a->col = col; 1804 1805 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1806 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1807 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1808 1809 /* 1810 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1811 for ILU(0) factorization with natural ordering 1812 */ 1813 if (inA->rmap.bs < 8) { 1814 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1815 } else { 1816 if (!a->solve_work) { 1817 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1818 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1819 } 1820 } 1821 1822 ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1823 1824 PetscFunctionReturn(0); 1825 } 1826 1827 EXTERN_C_BEGIN 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1830 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1831 { 1832 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1833 PetscInt i,nz,nbs; 1834 1835 PetscFunctionBegin; 1836 nz = baij->maxnz/baij->bs2; 1837 nbs = baij->nbs; 1838 for (i=0; i<nz; i++) { 1839 baij->j[i] = indices[i]; 1840 } 1841 baij->nz = nz; 1842 for (i=0; i<nbs; i++) { 1843 baij->ilen[i] = baij->imax[i]; 1844 } 1845 1846 PetscFunctionReturn(0); 1847 } 1848 EXTERN_C_END 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1852 /*@ 1853 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1854 in the matrix. 1855 1856 Input Parameters: 1857 + mat - the SeqBAIJ matrix 1858 - indices - the column indices 1859 1860 Level: advanced 1861 1862 Notes: 1863 This can be called if you have precomputed the nonzero structure of the 1864 matrix and want to provide it to the matrix object to improve the performance 1865 of the MatSetValues() operation. 1866 1867 You MUST have set the correct numbers of nonzeros per row in the call to 1868 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 1869 1870 MUST be called before any calls to MatSetValues(); 1871 1872 @*/ 1873 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1874 { 1875 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1876 1877 PetscFunctionBegin; 1878 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1879 PetscValidPointer(indices,2); 1880 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1881 if (f) { 1882 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1883 } else { 1884 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 1891 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 1892 { 1893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1894 PetscErrorCode ierr; 1895 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1896 PetscReal atmp; 1897 PetscScalar *x,zero = 0.0; 1898 MatScalar *aa; 1899 PetscInt ncols,brow,krow,kcol; 1900 1901 PetscFunctionBegin; 1902 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1903 bs = A->rmap.bs; 1904 aa = a->a; 1905 ai = a->i; 1906 aj = a->j; 1907 mbs = a->mbs; 1908 1909 ierr = VecSet(v,zero);CHKERRQ(ierr); 1910 if (idx) { 1911 for (i=0; i<A->rmap.n;i++) idx[i] = 0; 1912 } 1913 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1914 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1915 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1916 for (i=0; i<mbs; i++) { 1917 ncols = ai[1] - ai[0]; ai++; 1918 brow = bs*i; 1919 for (j=0; j<ncols; j++){ 1920 for (kcol=0; kcol<bs; kcol++){ 1921 for (krow=0; krow<bs; krow++){ 1922 atmp = PetscAbsScalar(*aa);aa++; 1923 row = brow + krow; /* row index */ 1924 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 1925 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 1926 } 1927 } 1928 aj++; 1929 } 1930 } 1931 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatCopy_SeqBAIJ" 1937 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 1938 { 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 /* If the two matrices have the same copy implementation, use fast copy. */ 1943 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1944 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1945 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 1946 1947 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1948 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1949 } 1950 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1951 } else { 1952 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1953 } 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1959 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1960 { 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1970 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1971 { 1972 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1973 PetscFunctionBegin; 1974 *array = a->a; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1980 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1981 { 1982 PetscFunctionBegin; 1983 PetscFunctionReturn(0); 1984 } 1985 1986 #include "petscblaslapack.h" 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1989 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1990 { 1991 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1992 PetscErrorCode ierr; 1993 PetscInt i,bs=Y->rmap.bs,j,bs2; 1994 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1995 1996 PetscFunctionBegin; 1997 if (str == SAME_NONZERO_PATTERN) { 1998 PetscScalar alpha = a; 1999 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2000 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2001 if (y->xtoy && y->XtoY != X) { 2002 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2003 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2004 } 2005 if (!y->xtoy) { /* get xtoy */ 2006 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2007 y->XtoY = X; 2008 } 2009 bs2 = bs*bs; 2010 for (i=0; i<x->nz; i++) { 2011 j = 0; 2012 while (j < bs2){ 2013 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2014 j++; 2015 } 2016 } 2017 ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2018 } else { 2019 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2020 } 2021 PetscFunctionReturn(0); 2022 } 2023 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2026 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2027 { 2028 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2029 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2030 PetscScalar *aa = a->a; 2031 2032 PetscFunctionBegin; 2033 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2039 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2040 { 2041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2042 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2043 PetscScalar *aa = a->a; 2044 2045 PetscFunctionBegin; 2046 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 2051 /* -------------------------------------------------------------------*/ 2052 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2053 MatGetRow_SeqBAIJ, 2054 MatRestoreRow_SeqBAIJ, 2055 MatMult_SeqBAIJ_N, 2056 /* 4*/ MatMultAdd_SeqBAIJ_N, 2057 MatMultTranspose_SeqBAIJ, 2058 MatMultTransposeAdd_SeqBAIJ, 2059 MatSolve_SeqBAIJ_N, 2060 0, 2061 0, 2062 /*10*/ 0, 2063 MatLUFactor_SeqBAIJ, 2064 0, 2065 0, 2066 MatTranspose_SeqBAIJ, 2067 /*15*/ MatGetInfo_SeqBAIJ, 2068 MatEqual_SeqBAIJ, 2069 MatGetDiagonal_SeqBAIJ, 2070 MatDiagonalScale_SeqBAIJ, 2071 MatNorm_SeqBAIJ, 2072 /*20*/ 0, 2073 MatAssemblyEnd_SeqBAIJ, 2074 0, 2075 MatSetOption_SeqBAIJ, 2076 MatZeroEntries_SeqBAIJ, 2077 /*25*/ MatZeroRows_SeqBAIJ, 2078 MatLUFactorSymbolic_SeqBAIJ, 2079 MatLUFactorNumeric_SeqBAIJ_N, 2080 MatCholeskyFactorSymbolic_SeqBAIJ, 2081 MatCholeskyFactorNumeric_SeqBAIJ_N, 2082 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2083 MatILUFactorSymbolic_SeqBAIJ, 2084 MatICCFactorSymbolic_SeqBAIJ, 2085 MatGetArray_SeqBAIJ, 2086 MatRestoreArray_SeqBAIJ, 2087 /*35*/ MatDuplicate_SeqBAIJ, 2088 0, 2089 0, 2090 MatILUFactor_SeqBAIJ, 2091 0, 2092 /*40*/ MatAXPY_SeqBAIJ, 2093 MatGetSubMatrices_SeqBAIJ, 2094 MatIncreaseOverlap_SeqBAIJ, 2095 MatGetValues_SeqBAIJ, 2096 MatCopy_SeqBAIJ, 2097 /*45*/ 0, 2098 MatScale_SeqBAIJ, 2099 0, 2100 0, 2101 0, 2102 /*50*/ 0, 2103 MatGetRowIJ_SeqBAIJ, 2104 MatRestoreRowIJ_SeqBAIJ, 2105 0, 2106 0, 2107 /*55*/ 0, 2108 0, 2109 0, 2110 0, 2111 MatSetValuesBlocked_SeqBAIJ, 2112 /*60*/ MatGetSubMatrix_SeqBAIJ, 2113 MatDestroy_SeqBAIJ, 2114 MatView_SeqBAIJ, 2115 0, 2116 0, 2117 /*65*/ 0, 2118 0, 2119 0, 2120 0, 2121 0, 2122 /*70*/ MatGetRowMaxAbs_SeqBAIJ, 2123 MatConvert_Basic, 2124 0, 2125 0, 2126 0, 2127 /*75*/ 0, 2128 0, 2129 0, 2130 0, 2131 0, 2132 /*80*/ 0, 2133 0, 2134 0, 2135 0, 2136 MatLoad_SeqBAIJ, 2137 /*85*/ 0, 2138 0, 2139 0, 2140 0, 2141 0, 2142 /*90*/ 0, 2143 0, 2144 0, 2145 0, 2146 0, 2147 /*95*/ 0, 2148 0, 2149 0, 2150 0, 2151 0, 2152 /*100*/0, 2153 0, 2154 0, 2155 0, 2156 0, 2157 /*105*/0, 2158 MatRealPart_SeqBAIJ, 2159 MatImaginaryPart_SeqBAIJ 2160 }; 2161 2162 EXTERN_C_BEGIN 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2165 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 2166 { 2167 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2168 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 if (aij->nonew != 1) { 2173 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2174 } 2175 2176 /* allocate space for values if not already there */ 2177 if (!aij->saved_values) { 2178 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2179 } 2180 2181 /* copy values over */ 2182 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 EXTERN_C_END 2186 2187 EXTERN_C_BEGIN 2188 #undef __FUNCT__ 2189 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2190 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 2191 { 2192 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2193 PetscErrorCode ierr; 2194 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 2195 2196 PetscFunctionBegin; 2197 if (aij->nonew != 1) { 2198 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2199 } 2200 if (!aij->saved_values) { 2201 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2202 } 2203 2204 /* copy values over */ 2205 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 EXTERN_C_END 2209 2210 EXTERN_C_BEGIN 2211 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2212 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2213 EXTERN_C_END 2214 2215 EXTERN_C_BEGIN 2216 #undef __FUNCT__ 2217 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2218 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2219 { 2220 Mat_SeqBAIJ *b; 2221 PetscErrorCode ierr; 2222 PetscInt i,mbs,nbs,bs2,newbs = bs; 2223 PetscTruth flg,skipallocation = PETSC_FALSE; 2224 2225 PetscFunctionBegin; 2226 2227 if (nz == MAT_SKIP_ALLOCATION) { 2228 skipallocation = PETSC_TRUE; 2229 nz = 0; 2230 } 2231 2232 ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2233 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr); 2234 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2235 2236 if (nnz && newbs != bs) { 2237 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2238 } 2239 bs = newbs; 2240 2241 B->rmap.bs = B->cmap.bs = bs; 2242 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 2243 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 2244 2245 B->preallocated = PETSC_TRUE; 2246 2247 mbs = B->rmap.n/bs; 2248 nbs = B->cmap.n/bs; 2249 bs2 = bs*bs; 2250 2251 if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) { 2252 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs); 2253 } 2254 2255 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2256 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2257 if (nnz) { 2258 for (i=0; i<mbs; i++) { 2259 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2260 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2261 } 2262 } 2263 2264 b = (Mat_SeqBAIJ*)B->data; 2265 ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2266 ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 2267 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2268 2269 B->ops->solve = MatSolve_SeqBAIJ_Update; 2270 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2271 if (!flg) { 2272 switch (bs) { 2273 case 1: 2274 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2275 B->ops->mult = MatMult_SeqBAIJ_1; 2276 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2277 break; 2278 case 2: 2279 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2280 B->ops->mult = MatMult_SeqBAIJ_2; 2281 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2282 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2283 break; 2284 case 3: 2285 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2286 B->ops->mult = MatMult_SeqBAIJ_3; 2287 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2288 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2289 break; 2290 case 4: 2291 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2292 B->ops->mult = MatMult_SeqBAIJ_4; 2293 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2294 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2295 break; 2296 case 5: 2297 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2298 B->ops->mult = MatMult_SeqBAIJ_5; 2299 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2300 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2301 break; 2302 case 6: 2303 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2304 B->ops->mult = MatMult_SeqBAIJ_6; 2305 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2306 break; 2307 case 7: 2308 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2309 B->ops->mult = MatMult_SeqBAIJ_7; 2310 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2311 break; 2312 default: 2313 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2314 B->ops->mult = MatMult_SeqBAIJ_N; 2315 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2316 break; 2317 } 2318 } 2319 B->rmap.bs = bs; 2320 b->mbs = mbs; 2321 b->nbs = nbs; 2322 if (!skipallocation) { 2323 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2324 /* b->ilen will count nonzeros in each block row so far. */ 2325 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2326 if (!nnz) { 2327 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2328 else if (nz <= 0) nz = 1; 2329 for (i=0; i<mbs; i++) b->imax[i] = nz; 2330 nz = nz*mbs; 2331 } else { 2332 nz = 0; 2333 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2334 } 2335 2336 /* allocate the matrix space */ 2337 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 2338 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2339 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2340 b->singlemalloc = PETSC_TRUE; 2341 2342 b->i[0] = 0; 2343 for (i=1; i<mbs+1; i++) { 2344 b->i[i] = b->i[i-1] + b->imax[i-1]; 2345 } 2346 b->free_a = PETSC_TRUE; 2347 b->free_ij = PETSC_TRUE; 2348 } else { 2349 b->free_a = PETSC_FALSE; 2350 b->free_ij = PETSC_FALSE; 2351 } 2352 2353 B->rmap.bs = bs; 2354 b->bs2 = bs2; 2355 b->mbs = mbs; 2356 b->nz = 0; 2357 b->maxnz = nz*bs2; 2358 B->info.nz_unneeded = (PetscReal)b->maxnz; 2359 PetscFunctionReturn(0); 2360 } 2361 EXTERN_C_END 2362 2363 /*MC 2364 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2365 block sparse compressed row format. 2366 2367 Options Database Keys: 2368 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2369 2370 Level: beginner 2371 2372 .seealso: MatCreateSeqBAIJ() 2373 M*/ 2374 2375 EXTERN_C_BEGIN 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatCreate_SeqBAIJ" 2378 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 2379 { 2380 PetscErrorCode ierr; 2381 PetscMPIInt size; 2382 Mat_SeqBAIJ *b; 2383 2384 PetscFunctionBegin; 2385 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2386 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2387 2388 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2389 B->data = (void*)b; 2390 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2391 B->factor = 0; 2392 B->mapping = 0; 2393 b->row = 0; 2394 b->col = 0; 2395 b->icol = 0; 2396 b->reallocs = 0; 2397 b->saved_values = 0; 2398 #if defined(PETSC_USE_MAT_SINGLE) 2399 b->setvalueslen = 0; 2400 b->setvaluescopy = PETSC_NULL; 2401 #endif 2402 2403 b->sorted = PETSC_FALSE; 2404 b->roworiented = PETSC_TRUE; 2405 b->nonew = 0; 2406 b->diag = 0; 2407 b->solve_work = 0; 2408 b->mult_work = 0; 2409 B->spptr = 0; 2410 B->info.nz_unneeded = (PetscReal)b->maxnz; 2411 b->keepzeroedrows = PETSC_FALSE; 2412 b->xtoy = 0; 2413 b->XtoY = 0; 2414 b->compressedrow.use = PETSC_FALSE; 2415 b->compressedrow.nrows = 0; 2416 b->compressedrow.i = PETSC_NULL; 2417 b->compressedrow.rindex = PETSC_NULL; 2418 b->compressedrow.checked = PETSC_FALSE; 2419 B->same_nonzero = PETSC_FALSE; 2420 2421 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 2422 "MatInvertBlockDiagonal_SeqBAIJ", 2423 MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 2424 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2425 "MatStoreValues_SeqBAIJ", 2426 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2427 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2428 "MatRetrieveValues_SeqBAIJ", 2429 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2430 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2431 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2432 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2433 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2434 "MatConvert_SeqBAIJ_SeqAIJ", 2435 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2436 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2437 "MatConvert_SeqBAIJ_SeqSBAIJ", 2438 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2439 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2440 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2441 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2442 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 EXTERN_C_END 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2449 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2450 { 2451 Mat C; 2452 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2453 PetscErrorCode ierr; 2454 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2455 2456 PetscFunctionBegin; 2457 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2458 2459 *B = 0; 2460 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 2461 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 2462 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2463 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2464 c = (Mat_SeqBAIJ*)C->data; 2465 2466 C->rmap.N = A->rmap.N; 2467 C->cmap.N = A->cmap.N; 2468 C->rmap.bs = A->rmap.bs; 2469 c->bs2 = a->bs2; 2470 c->mbs = a->mbs; 2471 c->nbs = a->nbs; 2472 2473 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 2474 for (i=0; i<mbs; i++) { 2475 c->imax[i] = a->imax[i]; 2476 c->ilen[i] = a->ilen[i]; 2477 } 2478 2479 /* allocate the matrix space */ 2480 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 2481 c->singlemalloc = PETSC_TRUE; 2482 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2483 if (mbs > 0) { 2484 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2485 if (cpvalues == MAT_COPY_VALUES) { 2486 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2487 } else { 2488 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2489 } 2490 } 2491 c->sorted = a->sorted; 2492 c->roworiented = a->roworiented; 2493 c->nonew = a->nonew; 2494 2495 if (a->diag) { 2496 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2497 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2498 for (i=0; i<mbs; i++) { 2499 c->diag[i] = a->diag[i]; 2500 } 2501 } else c->diag = 0; 2502 c->nz = a->nz; 2503 c->maxnz = a->maxnz; 2504 c->solve_work = 0; 2505 c->mult_work = 0; 2506 c->free_a = PETSC_TRUE; 2507 c->free_ij = PETSC_TRUE; 2508 C->preallocated = PETSC_TRUE; 2509 C->assembled = PETSC_TRUE; 2510 2511 c->compressedrow.use = a->compressedrow.use; 2512 c->compressedrow.nrows = a->compressedrow.nrows; 2513 c->compressedrow.checked = a->compressedrow.checked; 2514 if ( a->compressedrow.checked && a->compressedrow.use){ 2515 i = a->compressedrow.nrows; 2516 ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 2517 c->compressedrow.rindex = c->compressedrow.i + i + 1; 2518 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 2519 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 2520 } else { 2521 c->compressedrow.use = PETSC_FALSE; 2522 c->compressedrow.i = PETSC_NULL; 2523 c->compressedrow.rindex = PETSC_NULL; 2524 } 2525 C->same_nonzero = A->same_nonzero; 2526 *B = C; 2527 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 #undef __FUNCT__ 2532 #define __FUNCT__ "MatLoad_SeqBAIJ" 2533 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A) 2534 { 2535 Mat_SeqBAIJ *a; 2536 Mat B; 2537 PetscErrorCode ierr; 2538 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2539 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2540 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2541 PetscInt *masked,nmask,tmp,bs2,ishift; 2542 PetscMPIInt size; 2543 int fd; 2544 PetscScalar *aa; 2545 MPI_Comm comm = ((PetscObject)viewer)->comm; 2546 2547 PetscFunctionBegin; 2548 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 2549 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 2550 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2551 bs2 = bs*bs; 2552 2553 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2554 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2555 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2556 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2557 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2558 M = header[1]; N = header[2]; nz = header[3]; 2559 2560 if (header[3] < 0) { 2561 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2562 } 2563 2564 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2565 2566 /* 2567 This code adds extra rows to make sure the number of rows is 2568 divisible by the blocksize 2569 */ 2570 mbs = M/bs; 2571 extra_rows = bs - M + bs*(mbs); 2572 if (extra_rows == bs) extra_rows = 0; 2573 else mbs++; 2574 if (extra_rows) { 2575 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2576 } 2577 2578 /* read in row lengths */ 2579 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2580 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2581 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2582 2583 /* read in column indices */ 2584 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2585 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2586 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2587 2588 /* loop over row lengths determining block row lengths */ 2589 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2590 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2591 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2592 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2593 masked = mask + mbs; 2594 rowcount = 0; nzcount = 0; 2595 for (i=0; i<mbs; i++) { 2596 nmask = 0; 2597 for (j=0; j<bs; j++) { 2598 kmax = rowlengths[rowcount]; 2599 for (k=0; k<kmax; k++) { 2600 tmp = jj[nzcount++]/bs; 2601 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2602 } 2603 rowcount++; 2604 } 2605 browlengths[i] += nmask; 2606 /* zero out the mask elements we set */ 2607 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2608 } 2609 2610 /* create our matrix */ 2611 ierr = MatCreate(comm,&B); 2612 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 2613 ierr = MatSetType(B,type);CHKERRQ(ierr); 2614 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 2615 a = (Mat_SeqBAIJ*)B->data; 2616 2617 /* set matrix "i" values */ 2618 a->i[0] = 0; 2619 for (i=1; i<= mbs; i++) { 2620 a->i[i] = a->i[i-1] + browlengths[i-1]; 2621 a->ilen[i-1] = browlengths[i-1]; 2622 } 2623 a->nz = 0; 2624 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2625 2626 /* read in nonzero values */ 2627 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2628 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2629 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2630 2631 /* set "a" and "j" values into matrix */ 2632 nzcount = 0; jcount = 0; 2633 for (i=0; i<mbs; i++) { 2634 nzcountb = nzcount; 2635 nmask = 0; 2636 for (j=0; j<bs; j++) { 2637 kmax = rowlengths[i*bs+j]; 2638 for (k=0; k<kmax; k++) { 2639 tmp = jj[nzcount++]/bs; 2640 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2641 } 2642 } 2643 /* sort the masked values */ 2644 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2645 2646 /* set "j" values into matrix */ 2647 maskcount = 1; 2648 for (j=0; j<nmask; j++) { 2649 a->j[jcount++] = masked[j]; 2650 mask[masked[j]] = maskcount++; 2651 } 2652 /* set "a" values into matrix */ 2653 ishift = bs2*a->i[i]; 2654 for (j=0; j<bs; j++) { 2655 kmax = rowlengths[i*bs+j]; 2656 for (k=0; k<kmax; k++) { 2657 tmp = jj[nzcountb]/bs ; 2658 block = mask[tmp] - 1; 2659 point = jj[nzcountb] - bs*tmp; 2660 idx = ishift + bs2*block + j + bs*point; 2661 a->a[idx] = (MatScalar)aa[nzcountb++]; 2662 } 2663 } 2664 /* zero out the mask elements we set */ 2665 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2666 } 2667 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2668 2669 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2670 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2671 ierr = PetscFree(aa);CHKERRQ(ierr); 2672 ierr = PetscFree(jj);CHKERRQ(ierr); 2673 ierr = PetscFree(mask);CHKERRQ(ierr); 2674 2675 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2676 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2677 ierr = MatView_Private(B);CHKERRQ(ierr); 2678 2679 *A = B; 2680 PetscFunctionReturn(0); 2681 } 2682 2683 #undef __FUNCT__ 2684 #define __FUNCT__ "MatCreateSeqBAIJ" 2685 /*@C 2686 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2687 compressed row) format. For good matrix assembly performance the 2688 user should preallocate the matrix storage by setting the parameter nz 2689 (or the array nnz). By setting these parameters accurately, performance 2690 during matrix assembly can be increased by more than a factor of 50. 2691 2692 Collective on MPI_Comm 2693 2694 Input Parameters: 2695 + comm - MPI communicator, set to PETSC_COMM_SELF 2696 . bs - size of block 2697 . m - number of rows 2698 . n - number of columns 2699 . nz - number of nonzero blocks per block row (same for all rows) 2700 - nnz - array containing the number of nonzero blocks in the various block rows 2701 (possibly different for each block row) or PETSC_NULL 2702 2703 Output Parameter: 2704 . A - the matrix 2705 2706 Options Database Keys: 2707 . -mat_no_unroll - uses code that does not unroll the loops in the 2708 block calculations (much slower) 2709 . -mat_block_size - size of the blocks to use 2710 2711 Level: intermediate 2712 2713 Notes: 2714 The number of rows and columns must be divisible by blocksize. 2715 2716 If the nnz parameter is given then the nz parameter is ignored 2717 2718 A nonzero block is any block that as 1 or more nonzeros in it 2719 2720 The block AIJ format is fully compatible with standard Fortran 77 2721 storage. That is, the stored row and column indices can begin at 2722 either one (as in Fortran) or zero. See the users' manual for details. 2723 2724 Specify the preallocated storage with either nz or nnz (not both). 2725 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2726 allocation. For additional details, see the users manual chapter on 2727 matrices. 2728 2729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2730 @*/ 2731 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2732 { 2733 PetscErrorCode ierr; 2734 2735 PetscFunctionBegin; 2736 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2737 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 2738 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2739 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2745 /*@C 2746 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2747 per row in the matrix. For good matrix assembly performance the 2748 user should preallocate the matrix storage by setting the parameter nz 2749 (or the array nnz). By setting these parameters accurately, performance 2750 during matrix assembly can be increased by more than a factor of 50. 2751 2752 Collective on MPI_Comm 2753 2754 Input Parameters: 2755 + A - the matrix 2756 . bs - size of block 2757 . nz - number of block nonzeros per block row (same for all rows) 2758 - nnz - array containing the number of block nonzeros in the various block rows 2759 (possibly different for each block row) or PETSC_NULL 2760 2761 Options Database Keys: 2762 . -mat_no_unroll - uses code that does not unroll the loops in the 2763 block calculations (much slower) 2764 . -mat_block_size - size of the blocks to use 2765 2766 Level: intermediate 2767 2768 Notes: 2769 If the nnz parameter is given then the nz parameter is ignored 2770 2771 The block AIJ format is fully compatible with standard Fortran 77 2772 storage. That is, the stored row and column indices can begin at 2773 either one (as in Fortran) or zero. See the users' manual for details. 2774 2775 Specify the preallocated storage with either nz or nnz (not both). 2776 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2777 allocation. For additional details, see the users manual chapter on 2778 matrices. 2779 2780 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2781 @*/ 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2783 { 2784 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2785 2786 PetscFunctionBegin; 2787 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2788 if (f) { 2789 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 2796 /*@ 2797 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 2798 (upper triangular entries in CSR format) provided by the user. 2799 2800 Collective on MPI_Comm 2801 2802 Input Parameters: 2803 + comm - must be an MPI communicator of size 1 2804 . bs - size of block 2805 . m - number of rows 2806 . n - number of columns 2807 . i - row indices 2808 . j - column indices 2809 - a - matrix values 2810 2811 Output Parameter: 2812 . mat - the matrix 2813 2814 Level: intermediate 2815 2816 Notes: 2817 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2818 once the matrix is destroyed 2819 2820 You cannot set new nonzero locations into this matrix, that will generate an error. 2821 2822 The i and j indices are 0 based 2823 2824 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 2825 2826 @*/ 2827 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2828 { 2829 PetscErrorCode ierr; 2830 PetscInt ii; 2831 Mat_SeqBAIJ *baij; 2832 2833 PetscFunctionBegin; 2834 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2835 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2836 2837 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2838 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2839 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 2840 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2841 baij = (Mat_SeqBAIJ*)(*mat)->data; 2842 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 2843 2844 baij->i = i; 2845 baij->j = j; 2846 baij->a = a; 2847 baij->singlemalloc = PETSC_FALSE; 2848 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2849 baij->free_a = PETSC_FALSE; 2850 baij->free_ij = PETSC_FALSE; 2851 2852 for (ii=0; ii<m; ii++) { 2853 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 2854 #if defined(PETSC_USE_DEBUG) 2855 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2856 #endif 2857 } 2858 #if defined(PETSC_USE_DEBUG) 2859 for (ii=0; ii<baij->i[m]; ii++) { 2860 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2861 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2862 } 2863 #endif 2864 2865 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2866 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 PetscFunctionReturn(0); 2868 } 2869