1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/inline/spops.h" 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 9 #include "src/inline/ilu.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->bs,mbs = a->mbs; 18 PetscScalar *v = a->a,*odiag,*diag,*mdiag; 19 20 PetscFunctionBegin; 21 if (a->idiagvalid) PetscFunctionReturn(0); 22 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 23 diag_offset = a->diag; 24 if (!a->idiag) { 25 ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 26 } 27 diag = a->idiag; 28 mdiag = a->idiag+bs*bs*mbs; 29 /* factor and invert each block */ 30 switch (bs){ 31 case 2: 32 for (i=0; i<mbs; i++) { 33 odiag = v + 4*diag_offset[i]; 34 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 35 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 36 ierr = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr); 37 diag += 4; 38 mdiag += 4; 39 } 40 break; 41 case 3: 42 for (i=0; i<mbs; i++) { 43 odiag = v + 9*diag_offset[i]; 44 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 45 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 46 diag[8] = odiag[8]; 47 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 48 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 49 mdiag[8] = odiag[8]; 50 ierr = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr); 51 diag += 9; 52 mdiag += 9; 53 } 54 break; 55 case 4: 56 for (i=0; i<mbs; i++) { 57 odiag = v + 16*diag_offset[i]; 58 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 59 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 60 ierr = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr); 61 diag += 16; 62 mdiag += 16; 63 } 64 break; 65 case 5: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 25*diag_offset[i]; 68 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 69 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 70 ierr = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr); 71 diag += 25; 72 mdiag += 25; 73 } 74 break; 75 default: 76 SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 77 } 78 a->idiagvalid = PETSC_TRUE; 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 85 { 86 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 87 PetscScalar *x,x1,x2,s1,s2; 88 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 89 PetscErrorCode ierr; 90 PetscInt m = a->mbs,i,i2,nz,idx; 91 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 92 93 PetscFunctionBegin; 94 its = its*lits; 95 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 96 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 97 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 98 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 99 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 100 101 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 102 103 diag = a->diag; 104 idiag = a->idiag; 105 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 106 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 107 108 if (flag & SOR_ZERO_INITIAL_GUESS) { 109 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 110 x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 111 x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 112 i2 = 2; 113 idiag += 4; 114 for (i=1; i<m; i++) { 115 v = aa + 4*ai[i]; 116 vi = aj + ai[i]; 117 nz = diag[i] - ai[i]; 118 s1 = b[i2]; s2 = b[i2+1]; 119 while (nz--) { 120 idx = 2*(*vi++); 121 x1 = x[idx]; x2 = x[1+idx]; 122 s1 -= v[0]*x1 + v[2]*x2; 123 s2 -= v[1]*x1 + v[3]*x2; 124 v += 4; 125 } 126 x[i2] = idiag[0]*s1 + idiag[2]*s2; 127 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 128 idiag += 4; 129 i2 += 2; 130 } 131 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 132 PetscLogFlops(4*(a->nz)); 133 } 134 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 135 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 136 i2 = 0; 137 mdiag = a->idiag+4*a->mbs; 138 for (i=0; i<m; i++) { 139 x1 = x[i2]; x2 = x[i2+1]; 140 x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 141 x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 142 mdiag += 4; 143 i2 += 2; 144 } 145 PetscLogFlops(6*m); 146 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 147 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 148 } 149 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 150 idiag = a->idiag+4*a->mbs - 4; 151 i2 = 2*m - 2; 152 x1 = x[i2]; x2 = x[i2+1]; 153 x[i2] = idiag[0]*x1 + idiag[2]*x2; 154 x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 155 idiag -= 4; 156 i2 -= 2; 157 for (i=m-2; i>=0; i--) { 158 v = aa + 4*(diag[i]+1); 159 vi = aj + diag[i] + 1; 160 nz = ai[i+1] - diag[i] - 1; 161 s1 = x[i2]; s2 = x[i2+1]; 162 while (nz--) { 163 idx = 2*(*vi++); 164 x1 = x[idx]; x2 = x[1+idx]; 165 s1 -= v[0]*x1 + v[2]*x2; 166 s2 -= v[1]*x1 + v[3]*x2; 167 v += 4; 168 } 169 x[i2] = idiag[0]*s1 + idiag[2]*s2; 170 x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 171 idiag -= 4; 172 i2 -= 2; 173 } 174 PetscLogFlops(4*(a->nz)); 175 } 176 } else { 177 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 178 } 179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 180 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 189 PetscScalar *x,x1,x2,x3,s1,s2,s3; 190 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 191 PetscErrorCode ierr; 192 PetscInt m = a->mbs,i,i2,nz,idx; 193 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 194 195 PetscFunctionBegin; 196 its = its*lits; 197 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 198 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 199 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 200 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 201 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 202 203 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 204 205 diag = a->diag; 206 idiag = a->idiag; 207 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 208 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 209 210 if (flag & SOR_ZERO_INITIAL_GUESS) { 211 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 212 x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 213 x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 214 x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 215 i2 = 3; 216 idiag += 9; 217 for (i=1; i<m; i++) { 218 v = aa + 9*ai[i]; 219 vi = aj + ai[i]; 220 nz = diag[i] - ai[i]; 221 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 222 while (nz--) { 223 idx = 3*(*vi++); 224 x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 225 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 226 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 227 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 228 v += 9; 229 } 230 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 231 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 232 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 233 idiag += 9; 234 i2 += 3; 235 } 236 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 237 PetscLogFlops(9*(a->nz)); 238 } 239 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 240 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 241 i2 = 0; 242 mdiag = a->idiag+9*a->mbs; 243 for (i=0; i<m; i++) { 244 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 245 x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 246 x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 247 x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 248 mdiag += 9; 249 i2 += 3; 250 } 251 PetscLogFlops(15*m); 252 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 253 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 254 } 255 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 256 idiag = a->idiag+9*a->mbs - 9; 257 i2 = 3*m - 3; 258 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 259 x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 260 x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 261 x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 262 idiag -= 9; 263 i2 -= 3; 264 for (i=m-2; i>=0; i--) { 265 v = aa + 9*(diag[i]+1); 266 vi = aj + diag[i] + 1; 267 nz = ai[i+1] - diag[i] - 1; 268 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 269 while (nz--) { 270 idx = 3*(*vi++); 271 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 272 s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 273 s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 274 s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 275 v += 9; 276 } 277 x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 278 x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 279 x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 280 idiag -= 9; 281 i2 -= 3; 282 } 283 PetscLogFlops(9*(a->nz)); 284 } 285 } else { 286 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 287 } 288 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 289 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 296 { 297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 298 PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 299 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 300 PetscErrorCode ierr; 301 PetscInt m = a->mbs,i,i2,nz,idx; 302 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 303 304 PetscFunctionBegin; 305 its = its*lits; 306 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 307 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 308 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 309 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 310 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 311 312 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 313 314 diag = a->diag; 315 idiag = a->idiag; 316 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 317 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 318 319 if (flag & SOR_ZERO_INITIAL_GUESS) { 320 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 321 x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 322 x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 323 x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 324 x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 325 i2 = 4; 326 idiag += 16; 327 for (i=1; i<m; i++) { 328 v = aa + 16*ai[i]; 329 vi = aj + ai[i]; 330 nz = diag[i] - ai[i]; 331 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 332 while (nz--) { 333 idx = 4*(*vi++); 334 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 335 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 336 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 337 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 338 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 339 v += 16; 340 } 341 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 342 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 343 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 344 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 345 idiag += 16; 346 i2 += 4; 347 } 348 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 349 PetscLogFlops(16*(a->nz)); 350 } 351 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 352 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 353 i2 = 0; 354 mdiag = a->idiag+16*a->mbs; 355 for (i=0; i<m; i++) { 356 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 357 x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 358 x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 359 x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 360 x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 361 mdiag += 16; 362 i2 += 4; 363 } 364 PetscLogFlops(28*m); 365 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 366 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 367 } 368 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 369 idiag = a->idiag+16*a->mbs - 16; 370 i2 = 4*m - 4; 371 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 372 x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 373 x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 374 x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 375 x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 376 idiag -= 16; 377 i2 -= 4; 378 for (i=m-2; i>=0; i--) { 379 v = aa + 16*(diag[i]+1); 380 vi = aj + diag[i] + 1; 381 nz = ai[i+1] - diag[i] - 1; 382 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 383 while (nz--) { 384 idx = 4*(*vi++); 385 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 386 s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 387 s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 388 s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 389 s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 390 v += 16; 391 } 392 x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 393 x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 394 x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 395 x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 396 idiag -= 16; 397 i2 -= 4; 398 } 399 PetscLogFlops(16*(a->nz)); 400 } 401 } else { 402 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 403 } 404 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 405 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 412 { 413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 414 PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 415 const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag; 416 PetscErrorCode ierr; 417 PetscInt m = a->mbs,i,i2,nz,idx; 418 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 419 420 PetscFunctionBegin; 421 its = its*lits; 422 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 423 if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 424 if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 425 if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick"); 426 if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 427 428 if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 429 430 diag = a->diag; 431 idiag = a->idiag; 432 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 433 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 434 435 if (flag & SOR_ZERO_INITIAL_GUESS) { 436 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 437 x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 438 x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 439 x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 440 x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 441 x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 442 i2 = 5; 443 idiag += 25; 444 for (i=1; i<m; i++) { 445 v = aa + 25*ai[i]; 446 vi = aj + ai[i]; 447 nz = diag[i] - ai[i]; 448 s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 449 while (nz--) { 450 idx = 5*(*vi++); 451 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 452 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 453 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 454 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 455 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 456 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 457 v += 25; 458 } 459 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 460 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 461 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 462 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 463 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 464 idiag += 25; 465 i2 += 5; 466 } 467 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 468 PetscLogFlops(25*(a->nz)); 469 } 470 if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 471 (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 472 i2 = 0; 473 mdiag = a->idiag+25*a->mbs; 474 for (i=0; i<m; i++) { 475 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 476 x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 477 x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 478 x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 479 x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 480 x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 481 mdiag += 25; 482 i2 += 5; 483 } 484 PetscLogFlops(45*m); 485 } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 486 ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr); 487 } 488 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 489 idiag = a->idiag+25*a->mbs - 25; 490 i2 = 5*m - 5; 491 x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 492 x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 493 x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 494 x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 495 x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 496 x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 497 idiag -= 25; 498 i2 -= 5; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 25*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 504 while (nz--) { 505 idx = 5*(*vi++); 506 x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 507 s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 508 s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 509 s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 510 s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 511 s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 512 v += 25; 513 } 514 x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 515 x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 516 x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 517 x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 518 x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 519 idiag -= 25; 520 i2 -= 5; 521 } 522 PetscLogFlops(25*(a->nz)); 523 } 524 } else { 525 SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 526 } 527 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 528 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 529 PetscFunctionReturn(0); 530 } 531 532 /* 533 Special version for Fun3d sequential benchmark 534 */ 535 #if defined(PETSC_HAVE_FORTRAN_CAPS) 536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 538 #define matsetvaluesblocked4_ matsetvaluesblocked4 539 #endif 540 541 EXTERN_C_BEGIN 542 #undef __FUNCT__ 543 #define __FUNCT__ "matsetvaluesblocked4_" 544 void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 545 { 546 Mat A = *AA; 547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 548 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 549 PetscInt *ai=a->i,*ailen=a->ilen; 550 PetscInt *aj=a->j,stepval; 551 const PetscScalar *value = v; 552 MatScalar *ap,*aa = a->a,*bap; 553 554 PetscFunctionBegin; 555 stepval = (n-1)*4; 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 rp = aj + ai[row]; 559 ap = aa + 16*ai[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 col = in[l]; 564 value = v + k*(stepval+4)*4 + l*4; 565 low = 0; high = nrow; 566 while (high-low > 7) { 567 t = (low+high)/2; 568 if (rp[t] > col) high = t; 569 else low = t; 570 } 571 for (i=low; i<high; i++) { 572 if (rp[i] > col) break; 573 if (rp[i] == col) { 574 bap = ap + 16*i; 575 for (ii=0; ii<4; ii++,value+=stepval) { 576 for (jj=ii; jj<16; jj+=4) { 577 bap[jj] += *value++; 578 } 579 } 580 goto noinsert2; 581 } 582 } 583 N = nrow++ - 1; 584 /* shift up all the later entries in this row */ 585 for (ii=N; ii>=i; ii--) { 586 rp[ii+1] = rp[ii]; 587 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 588 } 589 if (N >= i) { 590 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 591 } 592 rp[i] = col; 593 bap = ap + 16*i; 594 for (ii=0; ii<4; ii++,value+=stepval) { 595 for (jj=ii; jj<16; jj+=4) { 596 bap[jj] = *value++; 597 } 598 } 599 noinsert2:; 600 low = i; 601 } 602 ailen[row] = nrow; 603 } 604 } 605 EXTERN_C_END 606 607 #if defined(PETSC_HAVE_FORTRAN_CAPS) 608 #define matsetvalues4_ MATSETVALUES4 609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 610 #define matsetvalues4_ matsetvalues4 611 #endif 612 613 EXTERN_C_BEGIN 614 #undef __FUNCT__ 615 #define __FUNCT__ "MatSetValues4_" 616 void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 617 { 618 Mat A = *AA; 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 620 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 621 PetscInt *ai=a->i,*ailen=a->ilen; 622 PetscInt *aj=a->j,brow,bcol; 623 PetscInt ridx,cidx; 624 MatScalar *ap,value,*aa=a->a,*bap; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over added rows */ 628 row = im[k]; brow = row/4; 629 rp = aj + ai[brow]; 630 ap = aa + 16*ai[brow]; 631 nrow = ailen[brow]; 632 low = 0; 633 for (l=0; l<n; l++) { /* loop over added columns */ 634 col = in[l]; bcol = col/4; 635 ridx = row % 4; cidx = col % 4; 636 value = v[l + k*n]; 637 low = 0; high = nrow; 638 while (high-low > 7) { 639 t = (low+high)/2; 640 if (rp[t] > bcol) high = t; 641 else low = t; 642 } 643 for (i=low; i<high; i++) { 644 if (rp[i] > bcol) break; 645 if (rp[i] == bcol) { 646 bap = ap + 16*i + 4*cidx + ridx; 647 *bap += value; 648 goto noinsert1; 649 } 650 } 651 N = nrow++ - 1; 652 /* shift up all the later entries in this row */ 653 for (ii=N; ii>=i; ii--) { 654 rp[ii+1] = rp[ii]; 655 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 656 } 657 if (N>=i) { 658 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 659 } 660 rp[i] = bcol; 661 ap[16*i + 4*cidx + ridx] = value; 662 noinsert1:; 663 low = i; 664 } 665 ailen[brow] = nrow; 666 } 667 } 668 EXTERN_C_END 669 670 /* UGLY, ugly, ugly 671 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 672 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 673 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 674 converts the entries into single precision and then calls ..._MatScalar() to put them 675 into the single precision data structures. 676 */ 677 #if defined(PETSC_USE_MAT_SINGLE) 678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode); 679 #else 680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 681 #endif 682 683 #define CHUNKSIZE 10 684 685 /* 686 Checks for missing diagonals 687 */ 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt *diag,*jj = a->j,i; 695 696 PetscFunctionBegin; 697 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 698 diag = a->diag; 699 for (i=0; i<a->mbs; i++) { 700 if (jj[diag[i]] != i) { 701 SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i); 702 } 703 } 704 PetscFunctionReturn(0); 705 } 706 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 710 { 711 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscInt i,j,*diag,m = a->mbs; 714 715 PetscFunctionBegin; 716 if (a->diag) PetscFunctionReturn(0); 717 718 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr); 719 PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt)); 720 for (i=0; i<m; i++) { 721 diag[i] = a->i[i+1]; 722 for (j=a->i[i]; j<a->i[i+1]; j++) { 723 if (a->j[j] == i) { 724 diag[i] = j; 725 break; 726 } 727 } 728 } 729 a->diag = diag; 730 PetscFunctionReturn(0); 731 } 732 733 734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 739 { 740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 741 PetscErrorCode ierr; 742 PetscInt n = a->mbs,i; 743 744 PetscFunctionBegin; 745 *nn = n; 746 if (!ia) PetscFunctionReturn(0); 747 if (symmetric) { 748 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 749 } else if (oshift == 1) { 750 /* temporarily add 1 to i and j indices */ 751 PetscInt nz = a->i[n]; 752 for (i=0; i<nz; i++) a->j[i]++; 753 for (i=0; i<n+1; i++) a->i[i]++; 754 *ia = a->i; *ja = a->j; 755 } else { 756 *ia = a->i; *ja = a->j; 757 } 758 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 765 { 766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 767 PetscErrorCode ierr; 768 PetscInt i,n = a->mbs; 769 770 PetscFunctionBegin; 771 if (!ia) PetscFunctionReturn(0); 772 if (symmetric) { 773 ierr = PetscFree(*ia);CHKERRQ(ierr); 774 ierr = PetscFree(*ja);CHKERRQ(ierr); 775 } else if (oshift == 1) { 776 PetscInt nz = a->i[n]-1; 777 for (i=0; i<nz; i++) a->j[i]--; 778 for (i=0; i<n+1; i++) a->i[i]--; 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatDestroy_SeqBAIJ" 785 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 786 { 787 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 #if defined(PETSC_USE_LOG) 792 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz); 793 #endif 794 ierr = PetscFree(a->a);CHKERRQ(ierr); 795 if (!a->singlemalloc) { 796 ierr = PetscFree(a->i);CHKERRQ(ierr); 797 ierr = PetscFree(a->j);CHKERRQ(ierr); 798 } 799 if (a->row) { 800 ierr = ISDestroy(a->row);CHKERRQ(ierr); 801 } 802 if (a->col) { 803 ierr = ISDestroy(a->col);CHKERRQ(ierr); 804 } 805 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 806 if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);} 807 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 808 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 809 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 810 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 811 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 812 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 813 #if defined(PETSC_USE_MAT_SINGLE) 814 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 815 #endif 816 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 817 818 ierr = PetscFree(a);CHKERRQ(ierr); 819 820 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 821 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 822 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 823 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 824 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 825 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 826 PetscFunctionReturn(0); 827 } 828 829 #undef __FUNCT__ 830 #define __FUNCT__ "MatSetOption_SeqBAIJ" 831 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_ROW_ORIENTED: 838 a->roworiented = PETSC_TRUE; 839 break; 840 case MAT_COLUMN_ORIENTED: 841 a->roworiented = PETSC_FALSE; 842 break; 843 case MAT_COLUMNS_SORTED: 844 a->sorted = PETSC_TRUE; 845 break; 846 case MAT_COLUMNS_UNSORTED: 847 a->sorted = PETSC_FALSE; 848 break; 849 case MAT_KEEP_ZEROED_ROWS: 850 a->keepzeroedrows = PETSC_TRUE; 851 break; 852 case MAT_NO_NEW_NONZERO_LOCATIONS: 853 a->nonew = 1; 854 break; 855 case MAT_NEW_NONZERO_LOCATION_ERR: 856 a->nonew = -1; 857 break; 858 case MAT_NEW_NONZERO_ALLOCATION_ERR: 859 a->nonew = -2; 860 break; 861 case MAT_YES_NEW_NONZERO_LOCATIONS: 862 a->nonew = 0; 863 break; 864 case MAT_ROWS_SORTED: 865 case MAT_ROWS_UNSORTED: 866 case MAT_YES_NEW_DIAGONALS: 867 case MAT_IGNORE_OFF_PROC_ENTRIES: 868 case MAT_USE_HASH_TABLE: 869 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 870 break; 871 case MAT_NO_NEW_DIAGONALS: 872 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 873 case MAT_SYMMETRIC: 874 case MAT_STRUCTURALLY_SYMMETRIC: 875 case MAT_NOT_SYMMETRIC: 876 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 877 case MAT_HERMITIAN: 878 case MAT_NOT_HERMITIAN: 879 case MAT_SYMMETRY_ETERNAL: 880 case MAT_NOT_SYMMETRY_ETERNAL: 881 break; 882 default: 883 SETERRQ(PETSC_ERR_SUP,"unknown option"); 884 } 885 PetscFunctionReturn(0); 886 } 887 888 #undef __FUNCT__ 889 #define __FUNCT__ "MatGetRow_SeqBAIJ" 890 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 891 { 892 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 893 PetscErrorCode ierr; 894 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 895 MatScalar *aa,*aa_i; 896 PetscScalar *v_i; 897 898 PetscFunctionBegin; 899 bs = A->bs; 900 ai = a->i; 901 aj = a->j; 902 aa = a->a; 903 bs2 = a->bs2; 904 905 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 906 907 bn = row/bs; /* Block number */ 908 bp = row % bs; /* Block Position */ 909 M = ai[bn+1] - ai[bn]; 910 *nz = bs*M; 911 912 if (v) { 913 *v = 0; 914 if (*nz) { 915 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 916 for (i=0; i<M; i++) { /* for each block in the block row */ 917 v_i = *v + i*bs; 918 aa_i = aa + bs2*(ai[bn] + i); 919 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 920 } 921 } 922 } 923 924 if (idx) { 925 *idx = 0; 926 if (*nz) { 927 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 928 for (i=0; i<M; i++) { /* for each block in the block row */ 929 idx_i = *idx + i*bs; 930 itmp = bs*aj[ai[bn] + i]; 931 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 932 } 933 } 934 } 935 PetscFunctionReturn(0); 936 } 937 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 940 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 941 { 942 PetscErrorCode ierr; 943 944 PetscFunctionBegin; 945 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 946 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 947 PetscFunctionReturn(0); 948 } 949 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatTranspose_SeqBAIJ" 952 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B) 953 { 954 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 955 Mat C; 956 PetscErrorCode ierr; 957 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 958 PetscInt *rows,*cols,bs2=a->bs2; 959 PetscScalar *array; 960 961 PetscFunctionBegin; 962 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 963 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 964 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 965 966 #if defined(PETSC_USE_MAT_SINGLE) 967 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 968 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 969 #else 970 array = a->a; 971 #endif 972 973 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 974 ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr); 975 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 976 ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 977 ierr = PetscFree(col);CHKERRQ(ierr); 978 ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 979 cols = rows + bs; 980 for (i=0; i<mbs; i++) { 981 cols[0] = i*bs; 982 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 983 len = ai[i+1] - ai[i]; 984 for (j=0; j<len; j++) { 985 rows[0] = (*aj++)*bs; 986 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 987 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 988 array += bs2; 989 } 990 } 991 ierr = PetscFree(rows);CHKERRQ(ierr); 992 #if defined(PETSC_USE_MAT_SINGLE) 993 ierr = PetscFree(array); 994 #endif 995 996 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 997 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 998 999 if (B) { 1000 *B = C; 1001 } else { 1002 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1009 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1010 { 1011 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1012 PetscErrorCode ierr; 1013 PetscInt i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2; 1014 int fd; 1015 PetscScalar *aa; 1016 FILE *file; 1017 1018 PetscFunctionBegin; 1019 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1020 ierr = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1021 col_lens[0] = MAT_FILE_COOKIE; 1022 1023 col_lens[1] = A->m; 1024 col_lens[2] = A->n; 1025 col_lens[3] = a->nz*bs2; 1026 1027 /* store lengths of each row and write (including header) to file */ 1028 count = 0; 1029 for (i=0; i<a->mbs; i++) { 1030 for (j=0; j<bs; j++) { 1031 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1032 } 1033 } 1034 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1035 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1036 1037 /* store column indices (zero start index) */ 1038 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1039 count = 0; 1040 for (i=0; i<a->mbs; i++) { 1041 for (j=0; j<bs; j++) { 1042 for (k=a->i[i]; k<a->i[i+1]; k++) { 1043 for (l=0; l<bs; l++) { 1044 jj[count++] = bs*a->j[k] + l; 1045 } 1046 } 1047 } 1048 } 1049 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1050 ierr = PetscFree(jj);CHKERRQ(ierr); 1051 1052 /* store nonzero values */ 1053 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1054 count = 0; 1055 for (i=0; i<a->mbs; i++) { 1056 for (j=0; j<bs; j++) { 1057 for (k=a->i[i]; k<a->i[i+1]; k++) { 1058 for (l=0; l<bs; l++) { 1059 aa[count++] = a->a[bs2*k + l*bs + j]; 1060 } 1061 } 1062 } 1063 } 1064 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1065 ierr = PetscFree(aa);CHKERRQ(ierr); 1066 1067 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1068 if (file) { 1069 fprintf(file,"-matload_block_size %d\n",(int)A->bs); 1070 } 1071 PetscFunctionReturn(0); 1072 } 1073 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1076 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1077 { 1078 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1079 PetscErrorCode ierr; 1080 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 1081 PetscViewerFormat format; 1082 1083 PetscFunctionBegin; 1084 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1085 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1086 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1087 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1088 Mat aij; 1089 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 1090 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1091 ierr = MatDestroy(aij);CHKERRQ(ierr); 1092 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1093 PetscFunctionReturn(0); 1094 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1095 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1096 for (i=0; i<a->mbs; i++) { 1097 for (j=0; j<bs; j++) { 1098 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1099 for (k=a->i[i]; k<a->i[i+1]; k++) { 1100 for (l=0; l<bs; l++) { 1101 #if defined(PETSC_USE_COMPLEX) 1102 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1103 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1104 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1105 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1106 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1107 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1108 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1109 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1110 } 1111 #else 1112 if (a->a[bs2*k + l*bs + j] != 0.0) { 1113 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1114 } 1115 #endif 1116 } 1117 } 1118 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1119 } 1120 } 1121 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1122 } else { 1123 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1124 for (i=0; i<a->mbs; i++) { 1125 for (j=0; j<bs; j++) { 1126 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1127 for (k=a->i[i]; k<a->i[i+1]; k++) { 1128 for (l=0; l<bs; l++) { 1129 #if defined(PETSC_USE_COMPLEX) 1130 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1131 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1132 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1133 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1134 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1135 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1138 } 1139 #else 1140 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1141 #endif 1142 } 1143 } 1144 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1145 } 1146 } 1147 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1148 } 1149 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1155 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1156 { 1157 Mat A = (Mat) Aa; 1158 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1159 PetscErrorCode ierr; 1160 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 1161 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1162 MatScalar *aa; 1163 PetscViewer viewer; 1164 1165 PetscFunctionBegin; 1166 1167 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 1168 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1169 1170 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1171 1172 /* loop over matrix elements drawing boxes */ 1173 color = PETSC_DRAW_BLUE; 1174 for (i=0,row=0; i<mbs; i++,row+=bs) { 1175 for (j=a->i[i]; j<a->i[i+1]; j++) { 1176 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1177 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1178 aa = a->a + j*bs2; 1179 for (k=0; k<bs; k++) { 1180 for (l=0; l<bs; l++) { 1181 if (PetscRealPart(*aa++) >= 0.) continue; 1182 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1183 } 1184 } 1185 } 1186 } 1187 color = PETSC_DRAW_CYAN; 1188 for (i=0,row=0; i<mbs; i++,row+=bs) { 1189 for (j=a->i[i]; j<a->i[i+1]; j++) { 1190 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1191 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1192 aa = a->a + j*bs2; 1193 for (k=0; k<bs; k++) { 1194 for (l=0; l<bs; l++) { 1195 if (PetscRealPart(*aa++) != 0.) continue; 1196 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1197 } 1198 } 1199 } 1200 } 1201 1202 color = PETSC_DRAW_RED; 1203 for (i=0,row=0; i<mbs; i++,row+=bs) { 1204 for (j=a->i[i]; j<a->i[i+1]; j++) { 1205 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 1206 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1207 aa = a->a + j*bs2; 1208 for (k=0; k<bs; k++) { 1209 for (l=0; l<bs; l++) { 1210 if (PetscRealPart(*aa++) <= 0.) continue; 1211 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1212 } 1213 } 1214 } 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1221 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1222 { 1223 PetscErrorCode ierr; 1224 PetscReal xl,yl,xr,yr,w,h; 1225 PetscDraw draw; 1226 PetscTruth isnull; 1227 1228 PetscFunctionBegin; 1229 1230 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1231 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1232 1233 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1234 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 1235 xr += w; yr += h; xl = -w; yl = -h; 1236 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1237 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1238 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatView_SeqBAIJ" 1244 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1245 { 1246 PetscErrorCode ierr; 1247 PetscTruth iascii,isbinary,isdraw; 1248 1249 PetscFunctionBegin; 1250 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1251 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1252 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1253 if (iascii){ 1254 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1255 } else if (isbinary) { 1256 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1257 } else if (isdraw) { 1258 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1259 } else { 1260 Mat B; 1261 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 1262 ierr = MatView(B,viewer);CHKERRQ(ierr); 1263 ierr = MatDestroy(B);CHKERRQ(ierr); 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1271 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1272 { 1273 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1274 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1275 PetscInt *ai = a->i,*ailen = a->ilen; 1276 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 1277 MatScalar *ap,*aa = a->a,zero = 0.0; 1278 1279 PetscFunctionBegin; 1280 for (k=0; k<m; k++) { /* loop over rows */ 1281 row = im[k]; brow = row/bs; 1282 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 1283 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1284 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1285 nrow = ailen[brow]; 1286 for (l=0; l<n; l++) { /* loop over columns */ 1287 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 1288 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1289 col = in[l] ; 1290 bcol = col/bs; 1291 cidx = col%bs; 1292 ridx = row%bs; 1293 high = nrow; 1294 low = 0; /* assume unsorted */ 1295 while (high-low > 5) { 1296 t = (low+high)/2; 1297 if (rp[t] > bcol) high = t; 1298 else low = t; 1299 } 1300 for (i=low; i<high; i++) { 1301 if (rp[i] > bcol) break; 1302 if (rp[i] == bcol) { 1303 *v++ = ap[bs2*i+bs*cidx+ridx]; 1304 goto finished; 1305 } 1306 } 1307 *v++ = zero; 1308 finished:; 1309 } 1310 } 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #if defined(PETSC_USE_MAT_SINGLE) 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1317 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 1318 { 1319 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 1320 PetscErrorCode ierr; 1321 PetscInt i,N = m*n*b->bs2; 1322 MatScalar *vsingle; 1323 1324 PetscFunctionBegin; 1325 if (N > b->setvalueslen) { 1326 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 1327 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 1328 b->setvalueslen = N; 1329 } 1330 vsingle = b->setvaluescopy; 1331 for (i=0; i<N; i++) { 1332 vsingle[i] = v[i]; 1333 } 1334 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 #endif 1338 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1342 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is) 1343 { 1344 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1345 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1346 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1347 PetscErrorCode ierr; 1348 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 1349 PetscTruth roworiented=a->roworiented; 1350 const MatScalar *value = v; 1351 MatScalar *ap,*aa = a->a,*bap; 1352 1353 PetscFunctionBegin; 1354 if (roworiented) { 1355 stepval = (n-1)*bs; 1356 } else { 1357 stepval = (m-1)*bs; 1358 } 1359 for (k=0; k<m; k++) { /* loop over added rows */ 1360 row = im[k]; 1361 if (row < 0) continue; 1362 #if defined(PETSC_USE_BOPT_g) 1363 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1364 #endif 1365 rp = aj + ai[row]; 1366 ap = aa + bs2*ai[row]; 1367 rmax = imax[row]; 1368 nrow = ailen[row]; 1369 low = 0; 1370 for (l=0; l<n; l++) { /* loop over added columns */ 1371 if (in[l] < 0) continue; 1372 #if defined(PETSC_USE_BOPT_g) 1373 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1374 #endif 1375 col = in[l]; 1376 if (roworiented) { 1377 value = v + k*(stepval+bs)*bs + l*bs; 1378 } else { 1379 value = v + l*(stepval+bs)*bs + k*bs; 1380 } 1381 if (!sorted) low = 0; high = nrow; 1382 while (high-low > 7) { 1383 t = (low+high)/2; 1384 if (rp[t] > col) high = t; 1385 else low = t; 1386 } 1387 for (i=low; i<high; i++) { 1388 if (rp[i] > col) break; 1389 if (rp[i] == col) { 1390 bap = ap + bs2*i; 1391 if (roworiented) { 1392 if (is == ADD_VALUES) { 1393 for (ii=0; ii<bs; ii++,value+=stepval) { 1394 for (jj=ii; jj<bs2; jj+=bs) { 1395 bap[jj] += *value++; 1396 } 1397 } 1398 } else { 1399 for (ii=0; ii<bs; ii++,value+=stepval) { 1400 for (jj=ii; jj<bs2; jj+=bs) { 1401 bap[jj] = *value++; 1402 } 1403 } 1404 } 1405 } else { 1406 if (is == ADD_VALUES) { 1407 for (ii=0; ii<bs; ii++,value+=stepval) { 1408 for (jj=0; jj<bs; jj++) { 1409 *bap++ += *value++; 1410 } 1411 } 1412 } else { 1413 for (ii=0; ii<bs; ii++,value+=stepval) { 1414 for (jj=0; jj<bs; jj++) { 1415 *bap++ = *value++; 1416 } 1417 } 1418 } 1419 } 1420 goto noinsert2; 1421 } 1422 } 1423 if (nonew == 1) goto noinsert2; 1424 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1425 if (nrow >= rmax) { 1426 /* there is no extra room in row, therefore enlarge */ 1427 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1428 MatScalar *new_a; 1429 1430 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1431 1432 /* malloc new storage space */ 1433 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1434 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1435 new_j = (PetscInt*)(new_a + bs2*new_nz); 1436 new_i = new_j + new_nz; 1437 1438 /* copy over old data into new slots */ 1439 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 1440 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1441 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1442 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 1443 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1444 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1445 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1446 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1447 /* free up old matrix storage */ 1448 ierr = PetscFree(a->a);CHKERRQ(ierr); 1449 if (!a->singlemalloc) { 1450 ierr = PetscFree(a->i);CHKERRQ(ierr); 1451 ierr = PetscFree(a->j);CHKERRQ(ierr); 1452 } 1453 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1454 a->singlemalloc = PETSC_TRUE; 1455 1456 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 1457 rmax = imax[row] = imax[row] + CHUNKSIZE; 1458 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 1459 a->maxnz += bs2*CHUNKSIZE; 1460 a->reallocs++; 1461 a->nz++; 1462 } 1463 N = nrow++ - 1; 1464 /* shift up all the later entries in this row */ 1465 for (ii=N; ii>=i; ii--) { 1466 rp[ii+1] = rp[ii]; 1467 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1468 } 1469 if (N >= i) { 1470 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1471 } 1472 rp[i] = col; 1473 bap = ap + bs2*i; 1474 if (roworiented) { 1475 for (ii=0; ii<bs; ii++,value+=stepval) { 1476 for (jj=ii; jj<bs2; jj+=bs) { 1477 bap[jj] = *value++; 1478 } 1479 } 1480 } else { 1481 for (ii=0; ii<bs; ii++,value+=stepval) { 1482 for (jj=0; jj<bs; jj++) { 1483 *bap++ = *value++; 1484 } 1485 } 1486 } 1487 noinsert2:; 1488 low = i; 1489 } 1490 ailen[row] = nrow; 1491 } 1492 PetscFunctionReturn(0); 1493 } 1494 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1497 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1498 { 1499 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1500 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1501 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 1502 PetscErrorCode ierr; 1503 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1504 MatScalar *aa = a->a,*ap; 1505 1506 PetscFunctionBegin; 1507 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1508 1509 if (m) rmax = ailen[0]; 1510 for (i=1; i<mbs; i++) { 1511 /* move each row back by the amount of empty slots (fshift) before it*/ 1512 fshift += imax[i-1] - ailen[i-1]; 1513 rmax = PetscMax(rmax,ailen[i]); 1514 if (fshift) { 1515 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1516 N = ailen[i]; 1517 for (j=0; j<N; j++) { 1518 ip[j-fshift] = ip[j]; 1519 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1520 } 1521 } 1522 ai[i] = ai[i-1] + ailen[i-1]; 1523 } 1524 if (mbs) { 1525 fshift += imax[mbs-1] - ailen[mbs-1]; 1526 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1527 } 1528 /* reset ilen and imax for each row */ 1529 for (i=0; i<mbs; i++) { 1530 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1531 } 1532 a->nz = ai[mbs]; 1533 1534 /* diagonals may have moved, so kill the diagonal pointers */ 1535 a->idiagvalid = PETSC_FALSE; 1536 if (fshift && a->diag) { 1537 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1538 PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt)); 1539 a->diag = 0; 1540 } 1541 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2); 1542 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs); 1543 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 1544 a->reallocs = 0; 1545 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1546 1547 PetscFunctionReturn(0); 1548 } 1549 1550 1551 1552 /* 1553 This function returns an array of flags which indicate the locations of contiguous 1554 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1555 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1556 Assume: sizes should be long enough to hold all the values. 1557 */ 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1560 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1561 { 1562 PetscInt i,j,k,row; 1563 PetscTruth flg; 1564 1565 PetscFunctionBegin; 1566 for (i=0,j=0; i<n; j++) { 1567 row = idx[i]; 1568 if (row%bs!=0) { /* Not the begining of a block */ 1569 sizes[j] = 1; 1570 i++; 1571 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1572 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1573 i++; 1574 } else { /* Begining of the block, so check if the complete block exists */ 1575 flg = PETSC_TRUE; 1576 for (k=1; k<bs; k++) { 1577 if (row+k != idx[i+k]) { /* break in the block */ 1578 flg = PETSC_FALSE; 1579 break; 1580 } 1581 } 1582 if (flg == PETSC_TRUE) { /* No break in the bs */ 1583 sizes[j] = bs; 1584 i+= bs; 1585 } else { 1586 sizes[j] = 1; 1587 i++; 1588 } 1589 } 1590 } 1591 *bs_max = j; 1592 PetscFunctionReturn(0); 1593 } 1594 1595 #undef __FUNCT__ 1596 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1597 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1598 { 1599 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1600 PetscErrorCode ierr; 1601 PetscInt i,j,k,count,is_n,*is_idx,*rows; 1602 PetscInt bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max; 1603 PetscScalar zero = 0.0; 1604 MatScalar *aa; 1605 1606 PetscFunctionBegin; 1607 /* Make a copy of the IS and sort it */ 1608 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1609 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1610 1611 /* allocate memory for rows,sizes */ 1612 ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 1613 sizes = rows + is_n; 1614 1615 /* copy IS values to rows, and sort them */ 1616 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1617 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1618 if (baij->keepzeroedrows) { 1619 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1620 bs_max = is_n; 1621 } else { 1622 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1623 } 1624 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1625 1626 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1627 row = rows[j]; 1628 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 1629 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1630 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1631 if (sizes[i] == bs && !baij->keepzeroedrows) { 1632 if (diag) { 1633 if (baij->ilen[row/bs] > 0) { 1634 baij->ilen[row/bs] = 1; 1635 baij->j[baij->i[row/bs]] = row/bs; 1636 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1637 } 1638 /* Now insert all the diagonal values for this bs */ 1639 for (k=0; k<bs; k++) { 1640 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1641 } 1642 } else { /* (!diag) */ 1643 baij->ilen[row/bs] = 0; 1644 } /* end (!diag) */ 1645 } else { /* (sizes[i] != bs) */ 1646 #if defined (PETSC_USE_DEBUG) 1647 if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 1648 #endif 1649 for (k=0; k<count; k++) { 1650 aa[0] = zero; 1651 aa += bs; 1652 } 1653 if (diag) { 1654 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1655 } 1656 } 1657 } 1658 1659 ierr = PetscFree(rows);CHKERRQ(ierr); 1660 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1666 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1667 { 1668 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1669 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1670 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1671 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 1672 PetscErrorCode ierr; 1673 PetscInt ridx,cidx,bs2=a->bs2; 1674 PetscTruth roworiented=a->roworiented; 1675 MatScalar *ap,value,*aa=a->a,*bap; 1676 1677 PetscFunctionBegin; 1678 for (k=0; k<m; k++) { /* loop over added rows */ 1679 row = im[k]; brow = row/bs; 1680 if (row < 0) continue; 1681 #if defined(PETSC_USE_BOPT_g) 1682 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 1683 #endif 1684 rp = aj + ai[brow]; 1685 ap = aa + bs2*ai[brow]; 1686 rmax = imax[brow]; 1687 nrow = ailen[brow]; 1688 low = 0; 1689 for (l=0; l<n; l++) { /* loop over added columns */ 1690 if (in[l] < 0) continue; 1691 #if defined(PETSC_USE_BOPT_g) 1692 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 1693 #endif 1694 col = in[l]; bcol = col/bs; 1695 ridx = row % bs; cidx = col % bs; 1696 if (roworiented) { 1697 value = v[l + k*n]; 1698 } else { 1699 value = v[k + l*m]; 1700 } 1701 if (!sorted) low = 0; high = nrow; 1702 while (high-low > 7) { 1703 t = (low+high)/2; 1704 if (rp[t] > bcol) high = t; 1705 else low = t; 1706 } 1707 for (i=low; i<high; i++) { 1708 if (rp[i] > bcol) break; 1709 if (rp[i] == bcol) { 1710 bap = ap + bs2*i + bs*cidx + ridx; 1711 if (is == ADD_VALUES) *bap += value; 1712 else *bap = value; 1713 goto noinsert1; 1714 } 1715 } 1716 if (nonew == 1) goto noinsert1; 1717 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1718 if (nrow >= rmax) { 1719 /* there is no extra room in row, therefore enlarge */ 1720 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1721 MatScalar *new_a; 1722 1723 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1724 1725 /* Malloc new storage space */ 1726 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 1727 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1728 new_j = (PetscInt*)(new_a + bs2*new_nz); 1729 new_i = new_j + new_nz; 1730 1731 /* copy over old data into new slots */ 1732 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1733 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1734 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 1735 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1736 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 1737 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1738 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1739 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1740 /* free up old matrix storage */ 1741 ierr = PetscFree(a->a);CHKERRQ(ierr); 1742 if (!a->singlemalloc) { 1743 ierr = PetscFree(a->i);CHKERRQ(ierr); 1744 ierr = PetscFree(a->j);CHKERRQ(ierr); 1745 } 1746 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1747 a->singlemalloc = PETSC_TRUE; 1748 1749 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1750 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1751 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 1752 a->maxnz += bs2*CHUNKSIZE; 1753 a->reallocs++; 1754 a->nz++; 1755 } 1756 N = nrow++ - 1; 1757 /* shift up all the later entries in this row */ 1758 for (ii=N; ii>=i; ii--) { 1759 rp[ii+1] = rp[ii]; 1760 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1761 } 1762 if (N>=i) { 1763 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1764 } 1765 rp[i] = bcol; 1766 ap[bs2*i + bs*cidx + ridx] = value; 1767 noinsert1:; 1768 low = i; 1769 } 1770 ailen[brow] = nrow; 1771 } 1772 PetscFunctionReturn(0); 1773 } 1774 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1778 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1779 { 1780 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1781 Mat outA; 1782 PetscErrorCode ierr; 1783 PetscTruth row_identity,col_identity; 1784 1785 PetscFunctionBegin; 1786 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1787 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1788 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1789 if (!row_identity || !col_identity) { 1790 SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 1791 } 1792 1793 outA = inA; 1794 inA->factor = FACTOR_LU; 1795 1796 if (!a->diag) { 1797 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1798 } 1799 1800 a->row = row; 1801 a->col = col; 1802 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1803 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1804 1805 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1806 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1807 PetscLogObjectParent(inA,a->icol); 1808 1809 /* 1810 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1811 for ILU(0) factorization with natural ordering 1812 */ 1813 if (inA->bs < 8) { 1814 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1815 } else { 1816 if (!a->solve_work) { 1817 ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1818 PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar)); 1819 } 1820 } 1821 1822 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1823 1824 PetscFunctionReturn(0); 1825 } 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1828 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A) 1829 { 1830 static PetscTruth called = PETSC_FALSE; 1831 MPI_Comm comm = A->comm; 1832 PetscErrorCode ierr; 1833 1834 PetscFunctionBegin; 1835 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1836 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1837 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1838 PetscFunctionReturn(0); 1839 } 1840 1841 EXTERN_C_BEGIN 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1844 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 1845 { 1846 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1847 PetscInt i,nz,nbs; 1848 1849 PetscFunctionBegin; 1850 nz = baij->maxnz/baij->bs2; 1851 nbs = baij->nbs; 1852 for (i=0; i<nz; i++) { 1853 baij->j[i] = indices[i]; 1854 } 1855 baij->nz = nz; 1856 for (i=0; i<nbs; i++) { 1857 baij->ilen[i] = baij->imax[i]; 1858 } 1859 1860 PetscFunctionReturn(0); 1861 } 1862 EXTERN_C_END 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1866 /*@ 1867 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1868 in the matrix. 1869 1870 Input Parameters: 1871 + mat - the SeqBAIJ matrix 1872 - indices - the column indices 1873 1874 Level: advanced 1875 1876 Notes: 1877 This can be called if you have precomputed the nonzero structure of the 1878 matrix and want to provide it to the matrix object to improve the performance 1879 of the MatSetValues() operation. 1880 1881 You MUST have set the correct numbers of nonzeros per row in the call to 1882 MatCreateSeqBAIJ(). 1883 1884 MUST be called before any calls to MatSetValues(); 1885 1886 @*/ 1887 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1888 { 1889 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1890 1891 PetscFunctionBegin; 1892 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1893 PetscValidPointer(indices,2); 1894 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1895 if (f) { 1896 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1897 } else { 1898 SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1905 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1906 { 1907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1908 PetscErrorCode ierr; 1909 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 1910 PetscReal atmp; 1911 PetscScalar *x,zero = 0.0; 1912 MatScalar *aa; 1913 PetscInt ncols,brow,krow,kcol; 1914 1915 PetscFunctionBegin; 1916 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1917 bs = A->bs; 1918 aa = a->a; 1919 ai = a->i; 1920 aj = a->j; 1921 mbs = a->mbs; 1922 1923 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1924 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1925 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1926 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1927 for (i=0; i<mbs; i++) { 1928 ncols = ai[1] - ai[0]; ai++; 1929 brow = bs*i; 1930 for (j=0; j<ncols; j++){ 1931 /* bcol = bs*(*aj); */ 1932 for (kcol=0; kcol<bs; kcol++){ 1933 for (krow=0; krow<bs; krow++){ 1934 atmp = PetscAbsScalar(*aa); aa++; 1935 row = brow + krow; /* row index */ 1936 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1937 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1938 } 1939 } 1940 aj++; 1941 } 1942 } 1943 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1949 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 1950 { 1951 PetscErrorCode ierr; 1952 1953 PetscFunctionBegin; 1954 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #undef __FUNCT__ 1959 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1960 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1961 { 1962 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1963 PetscFunctionBegin; 1964 *array = a->a; 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1970 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1971 { 1972 PetscFunctionBegin; 1973 PetscFunctionReturn(0); 1974 } 1975 1976 #include "petscblaslapack.h" 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1979 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1980 { 1981 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1982 PetscErrorCode ierr; 1983 PetscInt i,bs=Y->bs,j,bs2; 1984 PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz; 1985 1986 PetscFunctionBegin; 1987 if (str == SAME_NONZERO_PATTERN) { 1988 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1989 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1990 if (y->xtoy && y->XtoY != X) { 1991 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1992 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1993 } 1994 if (!y->xtoy) { /* get xtoy */ 1995 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1996 y->XtoY = X; 1997 } 1998 bs2 = bs*bs; 1999 for (i=0; i<x->nz; i++) { 2000 j = 0; 2001 while (j < bs2){ 2002 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 2003 j++; 2004 } 2005 } 2006 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 2007 } else { 2008 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 /* -------------------------------------------------------------------*/ 2014 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2015 MatGetRow_SeqBAIJ, 2016 MatRestoreRow_SeqBAIJ, 2017 MatMult_SeqBAIJ_N, 2018 /* 4*/ MatMultAdd_SeqBAIJ_N, 2019 MatMultTranspose_SeqBAIJ, 2020 MatMultTransposeAdd_SeqBAIJ, 2021 MatSolve_SeqBAIJ_N, 2022 0, 2023 0, 2024 /*10*/ 0, 2025 MatLUFactor_SeqBAIJ, 2026 0, 2027 0, 2028 MatTranspose_SeqBAIJ, 2029 /*15*/ MatGetInfo_SeqBAIJ, 2030 MatEqual_SeqBAIJ, 2031 MatGetDiagonal_SeqBAIJ, 2032 MatDiagonalScale_SeqBAIJ, 2033 MatNorm_SeqBAIJ, 2034 /*20*/ 0, 2035 MatAssemblyEnd_SeqBAIJ, 2036 0, 2037 MatSetOption_SeqBAIJ, 2038 MatZeroEntries_SeqBAIJ, 2039 /*25*/ MatZeroRows_SeqBAIJ, 2040 MatLUFactorSymbolic_SeqBAIJ, 2041 MatLUFactorNumeric_SeqBAIJ_N, 2042 0, 2043 0, 2044 /*30*/ MatSetUpPreallocation_SeqBAIJ, 2045 MatILUFactorSymbolic_SeqBAIJ, 2046 0, 2047 MatGetArray_SeqBAIJ, 2048 MatRestoreArray_SeqBAIJ, 2049 /*35*/ MatDuplicate_SeqBAIJ, 2050 0, 2051 0, 2052 MatILUFactor_SeqBAIJ, 2053 0, 2054 /*40*/ MatAXPY_SeqBAIJ, 2055 MatGetSubMatrices_SeqBAIJ, 2056 MatIncreaseOverlap_SeqBAIJ, 2057 MatGetValues_SeqBAIJ, 2058 0, 2059 /*45*/ MatPrintHelp_SeqBAIJ, 2060 MatScale_SeqBAIJ, 2061 0, 2062 0, 2063 0, 2064 /*50*/ 0, 2065 MatGetRowIJ_SeqBAIJ, 2066 MatRestoreRowIJ_SeqBAIJ, 2067 0, 2068 0, 2069 /*55*/ 0, 2070 0, 2071 0, 2072 0, 2073 MatSetValuesBlocked_SeqBAIJ, 2074 /*60*/ MatGetSubMatrix_SeqBAIJ, 2075 MatDestroy_SeqBAIJ, 2076 MatView_SeqBAIJ, 2077 MatGetPetscMaps_Petsc, 2078 0, 2079 /*65*/ 0, 2080 0, 2081 0, 2082 0, 2083 0, 2084 /*70*/ MatGetRowMax_SeqBAIJ, 2085 MatConvert_Basic, 2086 0, 2087 0, 2088 0, 2089 /*75*/ 0, 2090 0, 2091 0, 2092 0, 2093 0, 2094 /*80*/ 0, 2095 0, 2096 0, 2097 0, 2098 MatLoad_SeqBAIJ, 2099 /*85*/ 0, 2100 0, 2101 0, 2102 0, 2103 0, 2104 /*90*/ 0, 2105 0, 2106 0, 2107 0, 2108 0, 2109 /*95*/ 0, 2110 0, 2111 0, 2112 0}; 2113 2114 EXTERN_C_BEGIN 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2117 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2118 { 2119 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2120 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 if (aij->nonew != 1) { 2125 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2126 } 2127 2128 /* allocate space for values if not already there */ 2129 if (!aij->saved_values) { 2130 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2131 } 2132 2133 /* copy values over */ 2134 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 EXTERN_C_END 2138 2139 EXTERN_C_BEGIN 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2142 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2143 { 2144 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2145 PetscErrorCode ierr; 2146 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 2147 2148 PetscFunctionBegin; 2149 if (aij->nonew != 1) { 2150 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 2151 } 2152 if (!aij->saved_values) { 2153 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2154 } 2155 2156 /* copy values over */ 2157 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 EXTERN_C_END 2161 2162 EXTERN_C_BEGIN 2163 extern PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 2164 extern PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*); 2165 EXTERN_C_END 2166 2167 EXTERN_C_BEGIN 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2170 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2171 { 2172 Mat_SeqBAIJ *b; 2173 PetscErrorCode ierr; 2174 PetscInt i,len,mbs,nbs,bs2,newbs = bs; 2175 PetscTruth flg; 2176 2177 PetscFunctionBegin; 2178 2179 B->preallocated = PETSC_TRUE; 2180 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 2181 if (nnz && newbs != bs) { 2182 SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2183 } 2184 bs = newbs; 2185 2186 mbs = B->m/bs; 2187 nbs = B->n/bs; 2188 bs2 = bs*bs; 2189 2190 if (mbs*bs!=B->m || nbs*bs!=B->n) { 2191 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs); 2192 } 2193 2194 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2195 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2196 if (nnz) { 2197 for (i=0; i<mbs; i++) { 2198 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2199 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2200 } 2201 } 2202 2203 b = (Mat_SeqBAIJ*)B->data; 2204 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 2205 B->ops->solve = MatSolve_SeqBAIJ_Update; 2206 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2207 if (!flg) { 2208 switch (bs) { 2209 case 1: 2210 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2211 B->ops->mult = MatMult_SeqBAIJ_1; 2212 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2213 break; 2214 case 2: 2215 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2216 B->ops->mult = MatMult_SeqBAIJ_2; 2217 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2218 B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2219 break; 2220 case 3: 2221 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2222 B->ops->mult = MatMult_SeqBAIJ_3; 2223 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2224 B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2225 break; 2226 case 4: 2227 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2228 B->ops->mult = MatMult_SeqBAIJ_4; 2229 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2230 B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2231 break; 2232 case 5: 2233 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2234 B->ops->mult = MatMult_SeqBAIJ_5; 2235 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2236 B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2237 break; 2238 case 6: 2239 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2240 B->ops->mult = MatMult_SeqBAIJ_6; 2241 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2242 break; 2243 case 7: 2244 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2245 B->ops->mult = MatMult_SeqBAIJ_7; 2246 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2247 break; 2248 default: 2249 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2250 B->ops->mult = MatMult_SeqBAIJ_N; 2251 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2252 break; 2253 } 2254 } 2255 B->bs = bs; 2256 b->mbs = mbs; 2257 b->nbs = nbs; 2258 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 2259 if (!nnz) { 2260 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2261 else if (nz <= 0) nz = 1; 2262 for (i=0; i<mbs; i++) b->imax[i] = nz; 2263 nz = nz*mbs; 2264 } else { 2265 nz = 0; 2266 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2267 } 2268 2269 /* allocate the matrix space */ 2270 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 2271 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2272 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2273 b->j = (PetscInt*)(b->a + nz*bs2); 2274 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2275 b->i = b->j + nz; 2276 b->singlemalloc = PETSC_TRUE; 2277 2278 b->i[0] = 0; 2279 for (i=1; i<mbs+1; i++) { 2280 b->i[i] = b->i[i-1] + b->imax[i-1]; 2281 } 2282 2283 /* b->ilen will count nonzeros in each block row so far. */ 2284 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 2285 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2286 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2287 2288 B->bs = bs; 2289 b->bs2 = bs2; 2290 b->mbs = mbs; 2291 b->nz = 0; 2292 b->maxnz = nz*bs2; 2293 B->info.nz_unneeded = (PetscReal)b->maxnz; 2294 PetscFunctionReturn(0); 2295 } 2296 EXTERN_C_END 2297 2298 /*MC 2299 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2300 block sparse compressed row format. 2301 2302 Options Database Keys: 2303 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2304 2305 Level: beginner 2306 2307 .seealso: MatCreateSeqBAIJ 2308 M*/ 2309 2310 EXTERN_C_BEGIN 2311 #undef __FUNCT__ 2312 #define __FUNCT__ "MatCreate_SeqBAIJ" 2313 PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2314 { 2315 PetscErrorCode ierr; 2316 PetscMPIInt size; 2317 Mat_SeqBAIJ *b; 2318 2319 PetscFunctionBegin; 2320 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 2321 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2322 2323 B->m = B->M = PetscMax(B->m,B->M); 2324 B->n = B->N = PetscMax(B->n,B->N); 2325 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 2326 B->data = (void*)b; 2327 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 2328 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2329 B->factor = 0; 2330 B->lupivotthreshold = 1.0; 2331 B->mapping = 0; 2332 b->row = 0; 2333 b->col = 0; 2334 b->icol = 0; 2335 b->reallocs = 0; 2336 b->saved_values = 0; 2337 #if defined(PETSC_USE_MAT_SINGLE) 2338 b->setvalueslen = 0; 2339 b->setvaluescopy = PETSC_NULL; 2340 #endif 2341 2342 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 2343 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 2344 2345 b->sorted = PETSC_FALSE; 2346 b->roworiented = PETSC_TRUE; 2347 b->nonew = 0; 2348 b->diag = 0; 2349 b->solve_work = 0; 2350 b->mult_work = 0; 2351 B->spptr = 0; 2352 B->info.nz_unneeded = (PetscReal)b->maxnz; 2353 b->keepzeroedrows = PETSC_FALSE; 2354 b->xtoy = 0; 2355 b->XtoY = 0; 2356 2357 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2358 "MatStoreValues_SeqBAIJ", 2359 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 2360 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2361 "MatRetrieveValues_SeqBAIJ", 2362 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 2363 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 2364 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 2365 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 2366 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 2367 "MatConvert_SeqBAIJ_SeqAIJ", 2368 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 2369 #if !defined(PETSC_USE_64BIT_INT) 2370 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 2371 "MatConvert_SeqBAIJ_SeqSBAIJ", 2372 MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 2373 #endif 2374 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 2375 "MatSeqBAIJSetPreallocation_SeqBAIJ", 2376 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 EXTERN_C_END 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 2383 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 2384 { 2385 Mat C; 2386 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 2387 PetscErrorCode ierr; 2388 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 2389 2390 PetscFunctionBegin; 2391 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 2392 2393 *B = 0; 2394 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 2395 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 2396 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2397 c = (Mat_SeqBAIJ*)C->data; 2398 2399 C->M = A->M; 2400 C->N = A->N; 2401 C->bs = A->bs; 2402 c->bs2 = a->bs2; 2403 c->mbs = a->mbs; 2404 c->nbs = a->nbs; 2405 2406 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 2407 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 2408 for (i=0; i<mbs; i++) { 2409 c->imax[i] = a->imax[i]; 2410 c->ilen[i] = a->ilen[i]; 2411 } 2412 2413 /* allocate the matrix space */ 2414 c->singlemalloc = PETSC_TRUE; 2415 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 2416 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 2417 c->j = (PetscInt*)(c->a + nz*bs2); 2418 c->i = c->j + nz; 2419 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 2420 if (mbs > 0) { 2421 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2422 if (cpvalues == MAT_COPY_VALUES) { 2423 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2424 } else { 2425 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 2426 } 2427 } 2428 2429 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2430 c->sorted = a->sorted; 2431 c->roworiented = a->roworiented; 2432 c->nonew = a->nonew; 2433 2434 if (a->diag) { 2435 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 2436 PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt)); 2437 for (i=0; i<mbs; i++) { 2438 c->diag[i] = a->diag[i]; 2439 } 2440 } else c->diag = 0; 2441 c->nz = a->nz; 2442 c->maxnz = a->maxnz; 2443 c->solve_work = 0; 2444 c->mult_work = 0; 2445 C->preallocated = PETSC_TRUE; 2446 C->assembled = PETSC_TRUE; 2447 *B = C; 2448 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatLoad_SeqBAIJ" 2454 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 2455 { 2456 Mat_SeqBAIJ *a; 2457 Mat B; 2458 PetscErrorCode ierr; 2459 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 2460 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2461 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 2462 PetscInt *masked,nmask,tmp,bs2,ishift; 2463 PetscMPIInt size; 2464 int fd; 2465 PetscScalar *aa; 2466 MPI_Comm comm = ((PetscObject)viewer)->comm; 2467 2468 PetscFunctionBegin; 2469 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2470 bs2 = bs*bs; 2471 2472 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2473 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 2474 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2475 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 2476 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 2477 M = header[1]; N = header[2]; nz = header[3]; 2478 2479 if (header[3] < 0) { 2480 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 2481 } 2482 2483 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 2484 2485 /* 2486 This code adds extra rows to make sure the number of rows is 2487 divisible by the blocksize 2488 */ 2489 mbs = M/bs; 2490 extra_rows = bs - M + bs*(mbs); 2491 if (extra_rows == bs) extra_rows = 0; 2492 else mbs++; 2493 if (extra_rows) { 2494 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2495 } 2496 2497 /* read in row lengths */ 2498 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2499 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2500 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2501 2502 /* read in column indices */ 2503 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 2504 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 2505 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 2506 2507 /* loop over row lengths determining block row lengths */ 2508 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 2509 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2510 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 2511 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 2512 masked = mask + mbs; 2513 rowcount = 0; nzcount = 0; 2514 for (i=0; i<mbs; i++) { 2515 nmask = 0; 2516 for (j=0; j<bs; j++) { 2517 kmax = rowlengths[rowcount]; 2518 for (k=0; k<kmax; k++) { 2519 tmp = jj[nzcount++]/bs; 2520 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2521 } 2522 rowcount++; 2523 } 2524 browlengths[i] += nmask; 2525 /* zero out the mask elements we set */ 2526 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2527 } 2528 2529 /* create our matrix */ 2530 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 2531 ierr = MatSetType(B,type);CHKERRQ(ierr); 2532 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 2533 a = (Mat_SeqBAIJ*)B->data; 2534 2535 /* set matrix "i" values */ 2536 a->i[0] = 0; 2537 for (i=1; i<= mbs; i++) { 2538 a->i[i] = a->i[i-1] + browlengths[i-1]; 2539 a->ilen[i-1] = browlengths[i-1]; 2540 } 2541 a->nz = 0; 2542 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 2543 2544 /* read in nonzero values */ 2545 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 2546 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 2547 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 2548 2549 /* set "a" and "j" values into matrix */ 2550 nzcount = 0; jcount = 0; 2551 for (i=0; i<mbs; i++) { 2552 nzcountb = nzcount; 2553 nmask = 0; 2554 for (j=0; j<bs; j++) { 2555 kmax = rowlengths[i*bs+j]; 2556 for (k=0; k<kmax; k++) { 2557 tmp = jj[nzcount++]/bs; 2558 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2559 } 2560 } 2561 /* sort the masked values */ 2562 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 2563 2564 /* set "j" values into matrix */ 2565 maskcount = 1; 2566 for (j=0; j<nmask; j++) { 2567 a->j[jcount++] = masked[j]; 2568 mask[masked[j]] = maskcount++; 2569 } 2570 /* set "a" values into matrix */ 2571 ishift = bs2*a->i[i]; 2572 for (j=0; j<bs; j++) { 2573 kmax = rowlengths[i*bs+j]; 2574 for (k=0; k<kmax; k++) { 2575 tmp = jj[nzcountb]/bs ; 2576 block = mask[tmp] - 1; 2577 point = jj[nzcountb] - bs*tmp; 2578 idx = ishift + bs2*block + j + bs*point; 2579 a->a[idx] = (MatScalar)aa[nzcountb++]; 2580 } 2581 } 2582 /* zero out the mask elements we set */ 2583 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2584 } 2585 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2586 2587 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2588 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2589 ierr = PetscFree(aa);CHKERRQ(ierr); 2590 ierr = PetscFree(jj);CHKERRQ(ierr); 2591 ierr = PetscFree(mask);CHKERRQ(ierr); 2592 2593 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2594 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2595 ierr = MatView_Private(B);CHKERRQ(ierr); 2596 2597 *A = B; 2598 PetscFunctionReturn(0); 2599 } 2600 2601 #undef __FUNCT__ 2602 #define __FUNCT__ "MatCreateSeqBAIJ" 2603 /*@C 2604 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2605 compressed row) format. For good matrix assembly performance the 2606 user should preallocate the matrix storage by setting the parameter nz 2607 (or the array nnz). By setting these parameters accurately, performance 2608 during matrix assembly can be increased by more than a factor of 50. 2609 2610 Collective on MPI_Comm 2611 2612 Input Parameters: 2613 + comm - MPI communicator, set to PETSC_COMM_SELF 2614 . bs - size of block 2615 . m - number of rows 2616 . n - number of columns 2617 . nz - number of nonzero blocks per block row (same for all rows) 2618 - nnz - array containing the number of nonzero blocks in the various block rows 2619 (possibly different for each block row) or PETSC_NULL 2620 2621 Output Parameter: 2622 . A - the matrix 2623 2624 Options Database Keys: 2625 . -mat_no_unroll - uses code that does not unroll the loops in the 2626 block calculations (much slower) 2627 . -mat_block_size - size of the blocks to use 2628 2629 Level: intermediate 2630 2631 Notes: 2632 If the nnz parameter is given then the nz parameter is ignored 2633 2634 A nonzero block is any block that as 1 or more nonzeros in it 2635 2636 The block AIJ format is fully compatible with standard Fortran 77 2637 storage. That is, the stored row and column indices can begin at 2638 either one (as in Fortran) or zero. See the users' manual for details. 2639 2640 Specify the preallocated storage with either nz or nnz (not both). 2641 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2642 allocation. For additional details, see the users manual chapter on 2643 matrices. 2644 2645 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2646 @*/ 2647 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 2648 { 2649 PetscErrorCode ierr; 2650 2651 PetscFunctionBegin; 2652 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2653 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2654 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 #undef __FUNCT__ 2659 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2660 /*@C 2661 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2662 per row in the matrix. For good matrix assembly performance the 2663 user should preallocate the matrix storage by setting the parameter nz 2664 (or the array nnz). By setting these parameters accurately, performance 2665 during matrix assembly can be increased by more than a factor of 50. 2666 2667 Collective on MPI_Comm 2668 2669 Input Parameters: 2670 + A - the matrix 2671 . bs - size of block 2672 . nz - number of block nonzeros per block row (same for all rows) 2673 - nnz - array containing the number of block nonzeros in the various block rows 2674 (possibly different for each block row) or PETSC_NULL 2675 2676 Options Database Keys: 2677 . -mat_no_unroll - uses code that does not unroll the loops in the 2678 block calculations (much slower) 2679 . -mat_block_size - size of the blocks to use 2680 2681 Level: intermediate 2682 2683 Notes: 2684 If the nnz parameter is given then the nz parameter is ignored 2685 2686 The block AIJ format is fully compatible with standard Fortran 77 2687 storage. That is, the stored row and column indices can begin at 2688 either one (as in Fortran) or zero. See the users' manual for details. 2689 2690 Specify the preallocated storage with either nz or nnz (not both). 2691 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2692 allocation. For additional details, see the users manual chapter on 2693 matrices. 2694 2695 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2696 @*/ 2697 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 2698 { 2699 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 2700 2701 PetscFunctionBegin; 2702 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2703 if (f) { 2704 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2705 } 2706 PetscFunctionReturn(0); 2707 } 2708 2709