1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr); 156 } 157 work = a->mult_work; 158 t = work + k+1; 159 if (!a->sor_work) { 160 ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr); 161 } 162 w = a->sor_work; 163 164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 165 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 166 167 if (flag & SOR_ZERO_INITIAL_GUESS) { 168 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 169 switch (bs) { 170 case 1: 171 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 172 t[0] = b[0]; 173 i2 = 1; 174 idiag += 1; 175 for (i=1; i<m; i++) { 176 v = aa + ai[i]; 177 vi = aj + ai[i]; 178 nz = diag[i] - ai[i]; 179 s[0] = b[i2]; 180 for (j=0; j<nz; j++) { 181 xw[0] = x[vi[j]]; 182 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 183 } 184 t[i2] = s[0]; 185 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 186 x[i2] = xw[0]; 187 idiag += 1; 188 i2 += 1; 189 } 190 break; 191 case 2: 192 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 193 t[0] = b[0]; t[1] = b[1]; 194 i2 = 2; 195 idiag += 4; 196 for (i=1; i<m; i++) { 197 v = aa + 4*ai[i]; 198 vi = aj + ai[i]; 199 nz = diag[i] - ai[i]; 200 s[0] = b[i2]; s[1] = b[i2+1]; 201 for (j=0; j<nz; j++) { 202 idx = 2*vi[j]; 203 it = 4*j; 204 xw[0] = x[idx]; xw[1] = x[1+idx]; 205 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 206 } 207 t[i2] = s[0]; t[i2+1] = s[1]; 208 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 209 x[i2] = xw[0]; x[i2+1] = xw[1]; 210 idiag += 4; 211 i2 += 2; 212 } 213 break; 214 case 3: 215 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 216 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 217 i2 = 3; 218 idiag += 9; 219 for (i=1; i<m; i++) { 220 v = aa + 9*ai[i]; 221 vi = aj + ai[i]; 222 nz = diag[i] - ai[i]; 223 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 224 while (nz--) { 225 idx = 3*(*vi++); 226 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 228 v += 9; 229 } 230 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 231 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 232 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 233 idiag += 9; 234 i2 += 3; 235 } 236 break; 237 case 4: 238 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 239 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 240 i2 = 4; 241 idiag += 16; 242 for (i=1; i<m; i++) { 243 v = aa + 16*ai[i]; 244 vi = aj + ai[i]; 245 nz = diag[i] - ai[i]; 246 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 247 while (nz--) { 248 idx = 4*(*vi++); 249 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 250 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 251 v += 16; 252 } 253 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 254 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 255 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 256 idiag += 16; 257 i2 += 4; 258 } 259 break; 260 case 5: 261 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 262 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 263 i2 = 5; 264 idiag += 25; 265 for (i=1; i<m; i++) { 266 v = aa + 25*ai[i]; 267 vi = aj + ai[i]; 268 nz = diag[i] - ai[i]; 269 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 270 while (nz--) { 271 idx = 5*(*vi++); 272 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 273 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 274 v += 25; 275 } 276 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 277 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 278 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 279 idiag += 25; 280 i2 += 5; 281 } 282 break; 283 case 6: 284 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 285 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 286 i2 = 6; 287 idiag += 36; 288 for (i=1; i<m; i++) { 289 v = aa + 36*ai[i]; 290 vi = aj + ai[i]; 291 nz = diag[i] - ai[i]; 292 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 293 while (nz--) { 294 idx = 6*(*vi++); 295 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 296 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 298 v += 36; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 301 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 302 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 303 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 304 idiag += 36; 305 i2 += 6; 306 } 307 break; 308 case 7: 309 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 310 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 311 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 312 i2 = 7; 313 idiag += 49; 314 for (i=1; i<m; i++) { 315 v = aa + 49*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 319 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 320 while (nz--) { 321 idx = 7*(*vi++); 322 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 323 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 324 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 325 v += 49; 326 } 327 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 328 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 329 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 331 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 332 idiag += 49; 333 i2 += 7; 334 } 335 break; 336 default: 337 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 338 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 339 i2 = bs; 340 idiag += bs2; 341 for (i=1; i<m; i++) { 342 v = aa + bs2*ai[i]; 343 vi = aj + ai[i]; 344 nz = diag[i] - ai[i]; 345 346 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 347 /* copy all rows of x that are needed into contiguous space */ 348 workt = work; 349 for (j=0; j<nz; j++) { 350 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 351 workt += bs; 352 } 353 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 354 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 356 357 idiag += bs2; 358 i2 += bs; 359 } 360 break; 361 } 362 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 363 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 364 xb = t; 365 } 366 else xb = b; 367 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 368 idiag = a->idiag+bs2*(a->mbs-1); 369 i2 = bs * (m-1); 370 switch (bs) { 371 case 1: 372 s[0] = xb[i2]; 373 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 374 x[i2] = xw[0]; 375 i2 -= 1; 376 for (i=m-2; i>=0; i--) { 377 v = aa + (diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s[0] = xb[i2]; 381 for (j=0; j<nz; j++) { 382 xw[0] = x[vi[j]]; 383 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 384 } 385 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 386 x[i2] = xw[0]; 387 idiag -= 1; 388 i2 -= 1; 389 } 390 break; 391 case 2: 392 s[0] = xb[i2]; s[1] = xb[i2+1]; 393 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 394 x[i2] = xw[0]; x[i2+1] = xw[1]; 395 i2 -= 2; 396 idiag -= 4; 397 for (i=m-2; i>=0; i--) { 398 v = aa + 4*(diag[i] + 1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 for (j=0; j<nz; j++) { 403 idx = 2*vi[j]; 404 it = 4*j; 405 xw[0] = x[idx]; xw[1] = x[1+idx]; 406 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 407 } 408 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 409 x[i2] = xw[0]; x[i2+1] = xw[1]; 410 idiag -= 4; 411 i2 -= 2; 412 } 413 break; 414 case 3: 415 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 416 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 417 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 418 i2 -= 3; 419 idiag -= 9; 420 for (i=m-2; i>=0; i--) { 421 v = aa + 9*(diag[i]+1); 422 vi = aj + diag[i] + 1; 423 nz = ai[i+1] - diag[i] - 1; 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 while (nz--) { 426 idx = 3*(*vi++); 427 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 429 v += 9; 430 } 431 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 432 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 433 idiag -= 9; 434 i2 -= 3; 435 } 436 break; 437 case 4: 438 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 439 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 440 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 441 i2 -= 4; 442 idiag -= 16; 443 for (i=m-2; i>=0; i--) { 444 v = aa + 16*(diag[i]+1); 445 vi = aj + diag[i] + 1; 446 nz = ai[i+1] - diag[i] - 1; 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 while (nz--) { 449 idx = 4*(*vi++); 450 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 451 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 452 v += 16; 453 } 454 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 455 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 456 idiag -= 16; 457 i2 -= 4; 458 } 459 break; 460 case 5: 461 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 462 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 463 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 464 i2 -= 5; 465 idiag -= 25; 466 for (i=m-2; i>=0; i--) { 467 v = aa + 25*(diag[i]+1); 468 vi = aj + diag[i] + 1; 469 nz = ai[i+1] - diag[i] - 1; 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 while (nz--) { 472 idx = 5*(*vi++); 473 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 474 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 475 v += 25; 476 } 477 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 478 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 479 idiag -= 25; 480 i2 -= 5; 481 } 482 break; 483 case 6: 484 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 485 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 486 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 487 i2 -= 6; 488 idiag -= 36; 489 for (i=m-2; i>=0; i--) { 490 v = aa + 36*(diag[i]+1); 491 vi = aj + diag[i] + 1; 492 nz = ai[i+1] - diag[i] - 1; 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 while (nz--) { 495 idx = 6*(*vi++); 496 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 497 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 499 v += 36; 500 } 501 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 503 idiag -= 36; 504 i2 -= 6; 505 } 506 break; 507 case 7: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 509 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 510 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 512 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 513 i2 -= 7; 514 idiag -= 49; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 49*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 520 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 521 while (nz--) { 522 idx = 7*(*vi++); 523 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 524 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 525 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 526 v += 49; 527 } 528 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 529 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 530 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 531 idiag -= 49; 532 i2 -= 7; 533 } 534 break; 535 default: 536 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 537 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 538 i2 -= bs; 539 idiag -= bs2; 540 for (i=m-2; i>=0; i--) { 541 v = aa + bs2*(diag[i]+1); 542 vi = aj + diag[i] + 1; 543 nz = ai[i+1] - diag[i] - 1; 544 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 /* copy all rows of x that are needed into contiguous space */ 547 workt = work; 548 for (j=0; j<nz; j++) { 549 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 workt += bs; 551 } 552 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 553 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 554 555 idiag -= bs2; 556 i2 -= bs; 557 } 558 break; 559 } 560 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 561 } 562 its--; 563 } 564 while (its--) { 565 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 566 idiag = a->idiag; 567 i2 = 0; 568 switch (bs) { 569 case 1: 570 for (i=0; i<m; i++) { 571 v = aa + ai[i]; 572 vi = aj + ai[i]; 573 nz = ai[i+1] - ai[i]; 574 s[0] = b[i2]; 575 for (j=0; j<nz; j++) { 576 xw[0] = x[vi[j]]; 577 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 578 } 579 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 580 x[i2] += xw[0]; 581 idiag += 1; 582 i2 += 1; 583 } 584 break; 585 case 2: 586 for (i=0; i<m; i++) { 587 v = aa + 4*ai[i]; 588 vi = aj + ai[i]; 589 nz = ai[i+1] - ai[i]; 590 s[0] = b[i2]; s[1] = b[i2+1]; 591 for (j=0; j<nz; j++) { 592 idx = 2*vi[j]; 593 it = 4*j; 594 xw[0] = x[idx]; xw[1] = x[1+idx]; 595 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 596 } 597 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 598 x[i2] += xw[0]; x[i2+1] += xw[1]; 599 idiag += 4; 600 i2 += 2; 601 } 602 break; 603 case 3: 604 for (i=0; i<m; i++) { 605 v = aa + 9*ai[i]; 606 vi = aj + ai[i]; 607 nz = ai[i+1] - ai[i]; 608 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 609 while (nz--) { 610 idx = 3*(*vi++); 611 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 612 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 613 v += 9; 614 } 615 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 616 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 617 idiag += 9; 618 i2 += 3; 619 } 620 break; 621 case 4: 622 for (i=0; i<m; i++) { 623 v = aa + 16*ai[i]; 624 vi = aj + ai[i]; 625 nz = ai[i+1] - ai[i]; 626 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 627 while (nz--) { 628 idx = 4*(*vi++); 629 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 630 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 631 v += 16; 632 } 633 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 634 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 635 idiag += 16; 636 i2 += 4; 637 } 638 break; 639 case 5: 640 for (i=0; i<m; i++) { 641 v = aa + 25*ai[i]; 642 vi = aj + ai[i]; 643 nz = ai[i+1] - ai[i]; 644 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 645 while (nz--) { 646 idx = 5*(*vi++); 647 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 648 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 649 v += 25; 650 } 651 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 652 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 653 idiag += 25; 654 i2 += 5; 655 } 656 break; 657 case 6: 658 for (i=0; i<m; i++) { 659 v = aa + 36*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 663 while (nz--) { 664 idx = 6*(*vi++); 665 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 666 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 668 v += 36; 669 } 670 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 673 idiag += 36; 674 i2 += 6; 675 } 676 break; 677 case 7: 678 for (i=0; i<m; i++) { 679 v = aa + 49*ai[i]; 680 vi = aj + ai[i]; 681 nz = ai[i+1] - ai[i]; 682 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 683 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 684 while (nz--) { 685 idx = 7*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 689 v += 49; 690 } 691 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 692 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 693 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 694 idiag += 49; 695 i2 += 7; 696 } 697 break; 698 default: 699 for (i=0; i<m; i++) { 700 v = aa + bs2*ai[i]; 701 vi = aj + ai[i]; 702 nz = ai[i+1] - ai[i]; 703 704 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 705 /* copy all rows of x that are needed into contiguous space */ 706 workt = work; 707 for (j=0; j<nz; j++) { 708 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 709 workt += bs; 710 } 711 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 712 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 713 714 idiag += bs2; 715 i2 += bs; 716 } 717 break; 718 } 719 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 720 } 721 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 722 idiag = a->idiag+bs2*(a->mbs-1); 723 i2 = bs * (m-1); 724 switch (bs) { 725 case 1: 726 for (i=m-1; i>=0; i--) { 727 v = aa + ai[i]; 728 vi = aj + ai[i]; 729 nz = ai[i+1] - ai[i]; 730 s[0] = b[i2]; 731 for (j=0; j<nz; j++) { 732 xw[0] = x[vi[j]]; 733 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 734 } 735 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 736 x[i2] += xw[0]; 737 idiag -= 1; 738 i2 -= 1; 739 } 740 break; 741 case 2: 742 for (i=m-1; i>=0; i--) { 743 v = aa + 4*ai[i]; 744 vi = aj + ai[i]; 745 nz = ai[i+1] - ai[i]; 746 s[0] = b[i2]; s[1] = b[i2+1]; 747 for (j=0; j<nz; j++) { 748 idx = 2*vi[j]; 749 it = 4*j; 750 xw[0] = x[idx]; xw[1] = x[1+idx]; 751 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 752 } 753 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 754 x[i2] += xw[0]; x[i2+1] += xw[1]; 755 idiag -= 4; 756 i2 -= 2; 757 } 758 break; 759 case 3: 760 for (i=m-1; i>=0; i--) { 761 v = aa + 9*ai[i]; 762 vi = aj + ai[i]; 763 nz = ai[i+1] - ai[i]; 764 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 765 while (nz--) { 766 idx = 3*(*vi++); 767 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 768 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 769 v += 9; 770 } 771 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 772 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 773 idiag -= 9; 774 i2 -= 3; 775 } 776 break; 777 case 4: 778 for (i=m-1; i>=0; i--) { 779 v = aa + 16*ai[i]; 780 vi = aj + ai[i]; 781 nz = ai[i+1] - ai[i]; 782 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 783 while (nz--) { 784 idx = 4*(*vi++); 785 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 786 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 787 v += 16; 788 } 789 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 790 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 791 idiag -= 16; 792 i2 -= 4; 793 } 794 break; 795 case 5: 796 for (i=m-1; i>=0; i--) { 797 v = aa + 25*ai[i]; 798 vi = aj + ai[i]; 799 nz = ai[i+1] - ai[i]; 800 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 801 while (nz--) { 802 idx = 5*(*vi++); 803 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 804 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 805 v += 25; 806 } 807 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 808 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 809 idiag -= 25; 810 i2 -= 5; 811 } 812 break; 813 case 6: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 36*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 819 while (nz--) { 820 idx = 6*(*vi++); 821 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 822 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 824 v += 36; 825 } 826 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 829 idiag -= 36; 830 i2 -= 6; 831 } 832 break; 833 case 7: 834 for (i=m-1; i>=0; i--) { 835 v = aa + 49*ai[i]; 836 vi = aj + ai[i]; 837 nz = ai[i+1] - ai[i]; 838 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 839 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 844 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 845 v += 49; 846 } 847 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 848 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 849 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 850 idiag -= 49; 851 i2 -= 7; 852 } 853 break; 854 default: 855 for (i=m-1; i>=0; i--) { 856 v = aa + bs2*ai[i]; 857 vi = aj + ai[i]; 858 nz = ai[i+1] - ai[i]; 859 860 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 861 /* copy all rows of x that are needed into contiguous space */ 862 workt = work; 863 for (j=0; j<nz; j++) { 864 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 workt += bs; 866 } 867 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 868 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 869 870 idiag -= bs2; 871 i2 -= bs; 872 } 873 break; 874 } 875 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 876 } 877 } 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 884 /* 885 Special version for direct calls from Fortran (Used in PETSc-fun3d) 886 */ 887 #if defined(PETSC_HAVE_FORTRAN_CAPS) 888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 890 #define matsetvaluesblocked4_ matsetvaluesblocked4 891 #endif 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "matsetvaluesblocked4_" 895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 896 { 897 Mat A = *AA; 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 899 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 900 PetscInt *ai =a->i,*ailen=a->ilen; 901 PetscInt *aj =a->j,stepval,lastcol = -1; 902 const PetscScalar *value = v; 903 MatScalar *ap,*aa = a->a,*bap; 904 905 PetscFunctionBegin; 906 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 907 stepval = (n-1)*4; 908 for (k=0; k<m; k++) { /* loop over added rows */ 909 row = im[k]; 910 rp = aj + ai[row]; 911 ap = aa + 16*ai[row]; 912 nrow = ailen[row]; 913 low = 0; 914 high = nrow; 915 for (l=0; l<n; l++) { /* loop over added columns */ 916 col = in[l]; 917 if (col <= lastcol) low = 0; 918 else high = nrow; 919 lastcol = col; 920 value = v + k*(stepval+4 + l)*4; 921 while (high-low > 7) { 922 t = (low+high)/2; 923 if (rp[t] > col) high = t; 924 else low = t; 925 } 926 for (i=low; i<high; i++) { 927 if (rp[i] > col) break; 928 if (rp[i] == col) { 929 bap = ap + 16*i; 930 for (ii=0; ii<4; ii++,value+=stepval) { 931 for (jj=ii; jj<16; jj+=4) { 932 bap[jj] += *value++; 933 } 934 } 935 goto noinsert2; 936 } 937 } 938 N = nrow++ - 1; 939 high++; /* added new column index thus must search to one higher than before */ 940 /* shift up all the later entries in this row */ 941 for (ii=N; ii>=i; ii--) { 942 rp[ii+1] = rp[ii]; 943 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 944 } 945 if (N >= i) { 946 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 947 } 948 rp[i] = col; 949 bap = ap + 16*i; 950 for (ii=0; ii<4; ii++,value+=stepval) { 951 for (jj=ii; jj<16; jj+=4) { 952 bap[jj] = *value++; 953 } 954 } 955 noinsert2:; 956 low = i; 957 } 958 ailen[row] = nrow; 959 } 960 PetscFunctionReturnVoid(); 961 } 962 963 #if defined(PETSC_HAVE_FORTRAN_CAPS) 964 #define matsetvalues4_ MATSETVALUES4 965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 966 #define matsetvalues4_ matsetvalues4 967 #endif 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSetValues4_" 971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 972 { 973 Mat A = *AA; 974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 975 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 976 PetscInt *ai=a->i,*ailen=a->ilen; 977 PetscInt *aj=a->j,brow,bcol; 978 PetscInt ridx,cidx,lastcol = -1; 979 MatScalar *ap,value,*aa=a->a,*bap; 980 981 PetscFunctionBegin; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; brow = row/4; 984 rp = aj + ai[brow]; 985 ap = aa + 16*ai[brow]; 986 nrow = ailen[brow]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; bcol = col/4; 991 ridx = row % 4; cidx = col % 4; 992 value = v[l + k*n]; 993 if (col <= lastcol) low = 0; 994 else high = nrow; 995 lastcol = col; 996 while (high-low > 7) { 997 t = (low+high)/2; 998 if (rp[t] > bcol) high = t; 999 else low = t; 1000 } 1001 for (i=low; i<high; i++) { 1002 if (rp[i] > bcol) break; 1003 if (rp[i] == bcol) { 1004 bap = ap + 16*i + 4*cidx + ridx; 1005 *bap += value; 1006 goto noinsert1; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N>=i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = bcol; 1020 ap[16*i + 4*cidx + ridx] = value; 1021 noinsert1:; 1022 low = i; 1023 } 1024 ailen[brow] = nrow; 1025 } 1026 PetscFunctionReturnVoid(); 1027 } 1028 1029 /* 1030 Checks for missing diagonals 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*jj = a->j,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !jj) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1047 } else { 1048 diag = a->diag; 1049 for (i=0; i<a->mbs; i++) { 1050 if (jj[diag[i]] != i) { 1051 *missing = PETSC_TRUE; 1052 if (d) *d = i; 1053 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1054 break; 1055 } 1056 } 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = 0; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1176 { 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (!ia) PetscFunctionReturn(0); 1181 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1182 ierr = PetscFree(*ia);CHKERRQ(ierr); 1183 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1191 { 1192 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1198 #endif 1199 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1200 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1201 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1202 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1203 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1204 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1205 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1206 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1207 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1208 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1209 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1210 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1211 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1212 1213 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1214 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1215 ierr = PetscFree(A->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1234 { 1235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 switch (op) { 1240 case MAT_ROW_ORIENTED: 1241 a->roworiented = flg; 1242 break; 1243 case MAT_KEEP_NONZERO_PATTERN: 1244 a->keepnonzeropattern = flg; 1245 break; 1246 case MAT_NEW_NONZERO_LOCATIONS: 1247 a->nonew = (flg ? 0 : 1); 1248 break; 1249 case MAT_NEW_NONZERO_LOCATION_ERR: 1250 a->nonew = (flg ? -1 : 0); 1251 break; 1252 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1253 a->nonew = (flg ? -2 : 0); 1254 break; 1255 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1256 a->nounused = (flg ? -1 : 0); 1257 break; 1258 case MAT_NEW_DIAGONALS: 1259 case MAT_IGNORE_OFF_PROC_ENTRIES: 1260 case MAT_USE_HASH_TABLE: 1261 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1262 break; 1263 case MAT_SPD: 1264 case MAT_SYMMETRIC: 1265 case MAT_STRUCTURALLY_SYMMETRIC: 1266 case MAT_HERMITIAN: 1267 case MAT_SYMMETRY_ETERNAL: 1268 /* These options are handled directly by MatSetOption() */ 1269 break; 1270 default: 1271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1278 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1283 MatScalar *aa,*aa_i; 1284 PetscScalar *v_i; 1285 1286 PetscFunctionBegin; 1287 bs = A->rmap->bs; 1288 ai = a->i; 1289 aj = a->j; 1290 aa = a->a; 1291 bs2 = a->bs2; 1292 1293 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1294 1295 bn = row/bs; /* Block number */ 1296 bp = row % bs; /* Block Position */ 1297 M = ai[bn+1] - ai[bn]; 1298 *nz = bs*M; 1299 1300 if (v) { 1301 *v = 0; 1302 if (*nz) { 1303 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 1304 for (i=0; i<M; i++) { /* for each block in the block row */ 1305 v_i = *v + i*bs; 1306 aa_i = aa + bs2*(ai[bn] + i); 1307 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1308 } 1309 } 1310 } 1311 1312 if (idx) { 1313 *idx = 0; 1314 if (*nz) { 1315 ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 1316 for (i=0; i<M; i++) { /* for each block in the block row */ 1317 idx_i = *idx + i*bs; 1318 itmp = bs*aj[ai[bn] + i]; 1319 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1320 } 1321 } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1334 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1335 PetscFunctionReturn(0); 1336 } 1337 1338 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1343 { 1344 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1345 Mat C; 1346 PetscErrorCode ierr; 1347 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1348 PetscInt *rows,*cols,bs2=a->bs2; 1349 MatScalar *array; 1350 1351 PetscFunctionBegin; 1352 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1353 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1354 ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1355 ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 1356 1357 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1358 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1359 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1360 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1361 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1362 ierr = PetscFree(col);CHKERRQ(ierr); 1363 } else { 1364 C = *B; 1365 } 1366 1367 array = a->a; 1368 ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr); 1369 for (i=0; i<mbs; i++) { 1370 cols[0] = i*bs; 1371 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1372 len = ai[i+1] - ai[i]; 1373 for (j=0; j<len; j++) { 1374 rows[0] = (*aj++)*bs; 1375 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1376 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1377 array += bs2; 1378 } 1379 } 1380 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1381 1382 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1384 1385 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1386 *B = C; 1387 } else { 1388 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1389 } 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1395 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1396 { 1397 PetscErrorCode ierr; 1398 Mat Btrans; 1399 1400 PetscFunctionBegin; 1401 *f = PETSC_FALSE; 1402 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1403 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1404 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1410 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1411 { 1412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1415 int fd; 1416 PetscScalar *aa; 1417 FILE *file; 1418 1419 PetscFunctionBegin; 1420 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1421 ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1422 col_lens[0] = MAT_FILE_CLASSID; 1423 1424 col_lens[1] = A->rmap->N; 1425 col_lens[2] = A->cmap->n; 1426 col_lens[3] = a->nz*bs2; 1427 1428 /* store lengths of each row and write (including header) to file */ 1429 count = 0; 1430 for (i=0; i<a->mbs; i++) { 1431 for (j=0; j<bs; j++) { 1432 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1433 } 1434 } 1435 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1436 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1437 1438 /* store column indices (zero start index) */ 1439 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1440 count = 0; 1441 for (i=0; i<a->mbs; i++) { 1442 for (j=0; j<bs; j++) { 1443 for (k=a->i[i]; k<a->i[i+1]; k++) { 1444 for (l=0; l<bs; l++) { 1445 jj[count++] = bs*a->j[k] + l; 1446 } 1447 } 1448 } 1449 } 1450 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1451 ierr = PetscFree(jj);CHKERRQ(ierr); 1452 1453 /* store nonzero values */ 1454 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1455 count = 0; 1456 for (i=0; i<a->mbs; i++) { 1457 for (j=0; j<bs; j++) { 1458 for (k=a->i[i]; k<a->i[i+1]; k++) { 1459 for (l=0; l<bs; l++) { 1460 aa[count++] = a->a[bs2*k + l*bs + j]; 1461 } 1462 } 1463 } 1464 } 1465 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1466 ierr = PetscFree(aa);CHKERRQ(ierr); 1467 1468 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1469 if (file) { 1470 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1477 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1478 { 1479 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1480 PetscErrorCode ierr; 1481 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1482 PetscViewerFormat format; 1483 1484 PetscFunctionBegin; 1485 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1486 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1487 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1488 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1489 Mat aij; 1490 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1491 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1492 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1493 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1494 PetscFunctionReturn(0); 1495 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1496 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1497 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr); 1498 for (i=0; i<a->mbs; i++) { 1499 for (j=0; j<bs; j++) { 1500 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1501 for (k=a->i[i]; k<a->i[i+1]; k++) { 1502 for (l=0; l<bs; l++) { 1503 #if defined(PETSC_USE_COMPLEX) 1504 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1505 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 1506 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1507 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1508 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 1509 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1510 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1511 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1512 } 1513 #else 1514 if (a->a[bs2*k + l*bs + j] != 0.0) { 1515 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1516 } 1517 #endif 1518 } 1519 } 1520 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1521 } 1522 } 1523 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1524 } else { 1525 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr); 1527 for (i=0; i<a->mbs; i++) { 1528 for (j=0; j<bs; j++) { 1529 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1530 for (k=a->i[i]; k<a->i[i+1]; k++) { 1531 for (l=0; l<bs; l++) { 1532 #if defined(PETSC_USE_COMPLEX) 1533 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 1535 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1537 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 1538 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1539 } else { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } 1542 #else 1543 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1544 #endif 1545 } 1546 } 1547 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1548 } 1549 } 1550 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1551 } 1552 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #include <petscdraw.h> 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1559 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1560 { 1561 Mat A = (Mat) Aa; 1562 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1563 PetscErrorCode ierr; 1564 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1565 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1566 MatScalar *aa; 1567 PetscViewer viewer; 1568 PetscViewerFormat format; 1569 1570 PetscFunctionBegin; 1571 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1572 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1573 1574 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1575 1576 /* loop over matrix elements drawing boxes */ 1577 1578 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1579 color = PETSC_DRAW_BLUE; 1580 for (i=0,row=0; i<mbs; i++,row+=bs) { 1581 for (j=a->i[i]; j<a->i[i+1]; j++) { 1582 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1583 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1584 aa = a->a + j*bs2; 1585 for (k=0; k<bs; k++) { 1586 for (l=0; l<bs; l++) { 1587 if (PetscRealPart(*aa++) >= 0.) continue; 1588 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1589 } 1590 } 1591 } 1592 } 1593 color = PETSC_DRAW_CYAN; 1594 for (i=0,row=0; i<mbs; i++,row+=bs) { 1595 for (j=a->i[i]; j<a->i[i+1]; j++) { 1596 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1597 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1598 aa = a->a + j*bs2; 1599 for (k=0; k<bs; k++) { 1600 for (l=0; l<bs; l++) { 1601 if (PetscRealPart(*aa++) != 0.) continue; 1602 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1603 } 1604 } 1605 } 1606 } 1607 color = PETSC_DRAW_RED; 1608 for (i=0,row=0; i<mbs; i++,row+=bs) { 1609 for (j=a->i[i]; j<a->i[i+1]; j++) { 1610 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1611 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1612 aa = a->a + j*bs2; 1613 for (k=0; k<bs; k++) { 1614 for (l=0; l<bs; l++) { 1615 if (PetscRealPart(*aa++) <= 0.) continue; 1616 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1617 } 1618 } 1619 } 1620 } 1621 } else { 1622 /* use contour shading to indicate magnitude of values */ 1623 /* first determine max of all nonzero values */ 1624 PetscDraw popup; 1625 PetscReal scale,maxv = 0.0; 1626 1627 for (i=0; i<a->nz*a->bs2; i++) { 1628 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1629 } 1630 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1631 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1632 if (popup) { 1633 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1634 } 1635 for (i=0,row=0; i<mbs; i++,row+=bs) { 1636 for (j=a->i[i]; j<a->i[i+1]; j++) { 1637 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1638 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1639 aa = a->a + j*bs2; 1640 for (k=0; k<bs; k++) { 1641 for (l=0; l<bs; l++) { 1642 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1643 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1644 } 1645 } 1646 } 1647 } 1648 } 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #undef __FUNCT__ 1653 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1654 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1655 { 1656 PetscErrorCode ierr; 1657 PetscReal xl,yl,xr,yr,w,h; 1658 PetscDraw draw; 1659 PetscBool isnull; 1660 1661 PetscFunctionBegin; 1662 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1663 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1664 1665 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1666 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1667 xr += w; yr += h; xl = -w; yl = -h; 1668 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1669 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1670 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatView_SeqBAIJ" 1676 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1677 { 1678 PetscErrorCode ierr; 1679 PetscBool iascii,isbinary,isdraw; 1680 1681 PetscFunctionBegin; 1682 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1683 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1684 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1685 if (iascii) { 1686 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1687 } else if (isbinary) { 1688 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1689 } else if (isdraw) { 1690 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1691 } else { 1692 Mat B; 1693 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1694 ierr = MatView(B,viewer);CHKERRQ(ierr); 1695 ierr = MatDestroy(&B);CHKERRQ(ierr); 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1703 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1704 { 1705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1706 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1707 PetscInt *ai = a->i,*ailen = a->ilen; 1708 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1709 MatScalar *ap,*aa = a->a; 1710 1711 PetscFunctionBegin; 1712 for (k=0; k<m; k++) { /* loop over rows */ 1713 row = im[k]; brow = row/bs; 1714 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1715 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1716 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1717 nrow = ailen[brow]; 1718 for (l=0; l<n; l++) { /* loop over columns */ 1719 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1720 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1721 col = in[l]; 1722 bcol = col/bs; 1723 cidx = col%bs; 1724 ridx = row%bs; 1725 high = nrow; 1726 low = 0; /* assume unsorted */ 1727 while (high-low > 5) { 1728 t = (low+high)/2; 1729 if (rp[t] > bcol) high = t; 1730 else low = t; 1731 } 1732 for (i=low; i<high; i++) { 1733 if (rp[i] > bcol) break; 1734 if (rp[i] == bcol) { 1735 *v++ = ap[bs2*i+bs*cidx+ridx]; 1736 goto finished; 1737 } 1738 } 1739 *v++ = 0.0; 1740 finished:; 1741 } 1742 } 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1748 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1749 { 1750 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1751 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1752 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1753 PetscErrorCode ierr; 1754 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1755 PetscBool roworiented=a->roworiented; 1756 const PetscScalar *value = v; 1757 MatScalar *ap,*aa = a->a,*bap; 1758 1759 PetscFunctionBegin; 1760 if (roworiented) { 1761 stepval = (n-1)*bs; 1762 } else { 1763 stepval = (m-1)*bs; 1764 } 1765 for (k=0; k<m; k++) { /* loop over added rows */ 1766 row = im[k]; 1767 if (row < 0) continue; 1768 #if defined(PETSC_USE_DEBUG) 1769 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1770 #endif 1771 rp = aj + ai[row]; 1772 ap = aa + bs2*ai[row]; 1773 rmax = imax[row]; 1774 nrow = ailen[row]; 1775 low = 0; 1776 high = nrow; 1777 for (l=0; l<n; l++) { /* loop over added columns */ 1778 if (in[l] < 0) continue; 1779 #if defined(PETSC_USE_DEBUG) 1780 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1781 #endif 1782 col = in[l]; 1783 if (roworiented) { 1784 value = v + (k*(stepval+bs) + l)*bs; 1785 } else { 1786 value = v + (l*(stepval+bs) + k)*bs; 1787 } 1788 if (col <= lastcol) low = 0; 1789 else high = nrow; 1790 lastcol = col; 1791 while (high-low > 7) { 1792 t = (low+high)/2; 1793 if (rp[t] > col) high = t; 1794 else low = t; 1795 } 1796 for (i=low; i<high; i++) { 1797 if (rp[i] > col) break; 1798 if (rp[i] == col) { 1799 bap = ap + bs2*i; 1800 if (roworiented) { 1801 if (is == ADD_VALUES) { 1802 for (ii=0; ii<bs; ii++,value+=stepval) { 1803 for (jj=ii; jj<bs2; jj+=bs) { 1804 bap[jj] += *value++; 1805 } 1806 } 1807 } else { 1808 for (ii=0; ii<bs; ii++,value+=stepval) { 1809 for (jj=ii; jj<bs2; jj+=bs) { 1810 bap[jj] = *value++; 1811 } 1812 } 1813 } 1814 } else { 1815 if (is == ADD_VALUES) { 1816 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1817 for (jj=0; jj<bs; jj++) { 1818 bap[jj] += value[jj]; 1819 } 1820 bap += bs; 1821 } 1822 } else { 1823 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1824 for (jj=0; jj<bs; jj++) { 1825 bap[jj] = value[jj]; 1826 } 1827 bap += bs; 1828 } 1829 } 1830 } 1831 goto noinsert2; 1832 } 1833 } 1834 if (nonew == 1) goto noinsert2; 1835 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1836 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1837 N = nrow++ - 1; high++; 1838 /* shift up all the later entries in this row */ 1839 for (ii=N; ii>=i; ii--) { 1840 rp[ii+1] = rp[ii]; 1841 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1842 } 1843 if (N >= i) { 1844 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1845 } 1846 rp[i] = col; 1847 bap = ap + bs2*i; 1848 if (roworiented) { 1849 for (ii=0; ii<bs; ii++,value+=stepval) { 1850 for (jj=ii; jj<bs2; jj+=bs) { 1851 bap[jj] = *value++; 1852 } 1853 } 1854 } else { 1855 for (ii=0; ii<bs; ii++,value+=stepval) { 1856 for (jj=0; jj<bs; jj++) { 1857 *bap++ = *value++; 1858 } 1859 } 1860 } 1861 noinsert2:; 1862 low = i; 1863 } 1864 ailen[row] = nrow; 1865 } 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1871 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1872 { 1873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1874 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1875 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1876 PetscErrorCode ierr; 1877 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1878 MatScalar *aa = a->a,*ap; 1879 PetscReal ratio=0.6; 1880 1881 PetscFunctionBegin; 1882 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1883 1884 if (m) rmax = ailen[0]; 1885 for (i=1; i<mbs; i++) { 1886 /* move each row back by the amount of empty slots (fshift) before it*/ 1887 fshift += imax[i-1] - ailen[i-1]; 1888 rmax = PetscMax(rmax,ailen[i]); 1889 if (fshift) { 1890 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1891 N = ailen[i]; 1892 for (j=0; j<N; j++) { 1893 ip[j-fshift] = ip[j]; 1894 1895 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1896 } 1897 } 1898 ai[i] = ai[i-1] + ailen[i-1]; 1899 } 1900 if (mbs) { 1901 fshift += imax[mbs-1] - ailen[mbs-1]; 1902 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1903 } 1904 1905 /* reset ilen and imax for each row */ 1906 a->nonzerorowcnt = 0; 1907 for (i=0; i<mbs; i++) { 1908 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1909 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1910 } 1911 a->nz = ai[mbs]; 1912 1913 /* diagonals may have moved, so kill the diagonal pointers */ 1914 a->idiagvalid = PETSC_FALSE; 1915 if (fshift && a->diag) { 1916 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1917 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1918 a->diag = 0; 1919 } 1920 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1921 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1922 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1923 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1924 1925 A->info.mallocs += a->reallocs; 1926 a->reallocs = 0; 1927 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1928 1929 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1930 1931 A->same_nonzero = PETSC_TRUE; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 /* 1936 This function returns an array of flags which indicate the locations of contiguous 1937 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1938 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1939 Assume: sizes should be long enough to hold all the values. 1940 */ 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1943 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1944 { 1945 PetscInt i,j,k,row; 1946 PetscBool flg; 1947 1948 PetscFunctionBegin; 1949 for (i=0,j=0; i<n; j++) { 1950 row = idx[i]; 1951 if (row%bs!=0) { /* Not the begining of a block */ 1952 sizes[j] = 1; 1953 i++; 1954 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1955 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1956 i++; 1957 } else { /* Begining of the block, so check if the complete block exists */ 1958 flg = PETSC_TRUE; 1959 for (k=1; k<bs; k++) { 1960 if (row+k != idx[i+k]) { /* break in the block */ 1961 flg = PETSC_FALSE; 1962 break; 1963 } 1964 } 1965 if (flg) { /* No break in the bs */ 1966 sizes[j] = bs; 1967 i += bs; 1968 } else { 1969 sizes[j] = 1; 1970 i++; 1971 } 1972 } 1973 } 1974 *bs_max = j; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 #undef __FUNCT__ 1979 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1980 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1981 { 1982 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1983 PetscErrorCode ierr; 1984 PetscInt i,j,k,count,*rows; 1985 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1986 PetscScalar zero = 0.0; 1987 MatScalar *aa; 1988 const PetscScalar *xx; 1989 PetscScalar *bb; 1990 1991 PetscFunctionBegin; 1992 /* fix right hand side if needed */ 1993 if (x && b) { 1994 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1995 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1996 for (i=0; i<is_n; i++) { 1997 bb[is_idx[i]] = diag*xx[is_idx[i]]; 1998 } 1999 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2000 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2001 } 2002 2003 /* Make a copy of the IS and sort it */ 2004 /* allocate memory for rows,sizes */ 2005 ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr); 2006 2007 /* copy IS values to rows, and sort them */ 2008 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2009 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2010 2011 if (baij->keepnonzeropattern) { 2012 for (i=0; i<is_n; i++) sizes[i] = 1; 2013 bs_max = is_n; 2014 A->same_nonzero = PETSC_TRUE; 2015 } else { 2016 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2017 2018 A->same_nonzero = PETSC_FALSE; 2019 } 2020 2021 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2022 row = rows[j]; 2023 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2024 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2025 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2026 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2027 if (diag != (PetscScalar)0.0) { 2028 if (baij->ilen[row/bs] > 0) { 2029 baij->ilen[row/bs] = 1; 2030 baij->j[baij->i[row/bs]] = row/bs; 2031 2032 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2033 } 2034 /* Now insert all the diagonal values for this bs */ 2035 for (k=0; k<bs; k++) { 2036 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2037 } 2038 } else { /* (diag == 0.0) */ 2039 baij->ilen[row/bs] = 0; 2040 } /* end (diag == 0.0) */ 2041 } else { /* (sizes[i] != bs) */ 2042 #if defined(PETSC_USE_DEBUG) 2043 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2044 #endif 2045 for (k=0; k<count; k++) { 2046 aa[0] = zero; 2047 aa += bs; 2048 } 2049 if (diag != (PetscScalar)0.0) { 2050 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 } 2053 } 2054 2055 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2056 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 #undef __FUNCT__ 2061 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2062 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2063 { 2064 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2065 PetscErrorCode ierr; 2066 PetscInt i,j,k,count; 2067 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2068 PetscScalar zero = 0.0; 2069 MatScalar *aa; 2070 const PetscScalar *xx; 2071 PetscScalar *bb; 2072 PetscBool *zeroed,vecs = PETSC_FALSE; 2073 2074 PetscFunctionBegin; 2075 /* fix right hand side if needed */ 2076 if (x && b) { 2077 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2078 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2079 vecs = PETSC_TRUE; 2080 } 2081 A->same_nonzero = PETSC_TRUE; 2082 2083 /* zero the columns */ 2084 ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr); 2085 ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 2086 for (i=0; i<is_n; i++) { 2087 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2088 zeroed[is_idx[i]] = PETSC_TRUE; 2089 } 2090 for (i=0; i<A->rmap->N; i++) { 2091 if (!zeroed[i]) { 2092 row = i/bs; 2093 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2094 for (k=0; k<bs; k++) { 2095 col = bs*baij->j[j] + k; 2096 if (zeroed[col]) { 2097 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2098 if (vecs) bb[i] -= aa[0]*xx[col]; 2099 aa[0] = 0.0; 2100 } 2101 } 2102 } 2103 } else if (vecs) bb[i] = diag*xx[i]; 2104 } 2105 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2106 if (vecs) { 2107 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2108 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2109 } 2110 2111 /* zero the rows */ 2112 for (i=0; i<is_n; i++) { 2113 row = is_idx[i]; 2114 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2115 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2116 for (k=0; k<count; k++) { 2117 aa[0] = zero; 2118 aa += bs; 2119 } 2120 if (diag != (PetscScalar)0.0) { 2121 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2122 } 2123 } 2124 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2130 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2131 { 2132 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2133 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2134 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2135 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2136 PetscErrorCode ierr; 2137 PetscInt ridx,cidx,bs2=a->bs2; 2138 PetscBool roworiented=a->roworiented; 2139 MatScalar *ap,value,*aa=a->a,*bap; 2140 2141 PetscFunctionBegin; 2142 if (v) PetscValidScalarPointer(v,6); 2143 for (k=0; k<m; k++) { /* loop over added rows */ 2144 row = im[k]; 2145 brow = row/bs; 2146 if (row < 0) continue; 2147 #if defined(PETSC_USE_DEBUG) 2148 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2149 #endif 2150 rp = aj + ai[brow]; 2151 ap = aa + bs2*ai[brow]; 2152 rmax = imax[brow]; 2153 nrow = ailen[brow]; 2154 low = 0; 2155 high = nrow; 2156 for (l=0; l<n; l++) { /* loop over added columns */ 2157 if (in[l] < 0) continue; 2158 #if defined(PETSC_USE_DEBUG) 2159 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2160 #endif 2161 col = in[l]; bcol = col/bs; 2162 ridx = row % bs; cidx = col % bs; 2163 if (roworiented) { 2164 value = v[l + k*n]; 2165 } else { 2166 value = v[k + l*m]; 2167 } 2168 if (col <= lastcol) low = 0; else high = nrow; 2169 lastcol = col; 2170 while (high-low > 7) { 2171 t = (low+high)/2; 2172 if (rp[t] > bcol) high = t; 2173 else low = t; 2174 } 2175 for (i=low; i<high; i++) { 2176 if (rp[i] > bcol) break; 2177 if (rp[i] == bcol) { 2178 bap = ap + bs2*i + bs*cidx + ridx; 2179 if (is == ADD_VALUES) *bap += value; 2180 else *bap = value; 2181 goto noinsert1; 2182 } 2183 } 2184 if (nonew == 1) goto noinsert1; 2185 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2186 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2187 N = nrow++ - 1; high++; 2188 /* shift up all the later entries in this row */ 2189 for (ii=N; ii>=i; ii--) { 2190 rp[ii+1] = rp[ii]; 2191 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2192 } 2193 if (N>=i) { 2194 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2195 } 2196 rp[i] = bcol; 2197 ap[bs2*i + bs*cidx + ridx] = value; 2198 a->nz++; 2199 noinsert1:; 2200 low = i; 2201 } 2202 ailen[brow] = nrow; 2203 } 2204 A->same_nonzero = PETSC_FALSE; 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2210 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2211 { 2212 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2213 Mat outA; 2214 PetscErrorCode ierr; 2215 PetscBool row_identity,col_identity; 2216 2217 PetscFunctionBegin; 2218 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2219 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2220 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2221 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2222 2223 outA = inA; 2224 inA->factortype = MAT_FACTOR_LU; 2225 2226 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2227 2228 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2229 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2230 a->row = row; 2231 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2232 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2233 a->col = col; 2234 2235 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2236 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2237 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2238 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2239 2240 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2241 if (!a->solve_work) { 2242 ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2243 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2244 } 2245 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } 2248 2249 #undef __FUNCT__ 2250 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2251 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2252 { 2253 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2254 PetscInt i,nz,mbs; 2255 2256 PetscFunctionBegin; 2257 nz = baij->maxnz; 2258 mbs = baij->mbs; 2259 for (i=0; i<nz; i++) { 2260 baij->j[i] = indices[i]; 2261 } 2262 baij->nz = nz; 2263 for (i=0; i<mbs; i++) { 2264 baij->ilen[i] = baij->imax[i]; 2265 } 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2271 /*@ 2272 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2273 in the matrix. 2274 2275 Input Parameters: 2276 + mat - the SeqBAIJ matrix 2277 - indices - the column indices 2278 2279 Level: advanced 2280 2281 Notes: 2282 This can be called if you have precomputed the nonzero structure of the 2283 matrix and want to provide it to the matrix object to improve the performance 2284 of the MatSetValues() operation. 2285 2286 You MUST have set the correct numbers of nonzeros per row in the call to 2287 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2288 2289 MUST be called before any calls to MatSetValues(); 2290 2291 @*/ 2292 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2293 { 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2298 PetscValidPointer(indices,2); 2299 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2305 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2306 { 2307 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2308 PetscErrorCode ierr; 2309 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2310 PetscReal atmp; 2311 PetscScalar *x,zero = 0.0; 2312 MatScalar *aa; 2313 PetscInt ncols,brow,krow,kcol; 2314 2315 PetscFunctionBegin; 2316 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2317 bs = A->rmap->bs; 2318 aa = a->a; 2319 ai = a->i; 2320 aj = a->j; 2321 mbs = a->mbs; 2322 2323 ierr = VecSet(v,zero);CHKERRQ(ierr); 2324 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2325 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2326 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2327 for (i=0; i<mbs; i++) { 2328 ncols = ai[1] - ai[0]; ai++; 2329 brow = bs*i; 2330 for (j=0; j<ncols; j++) { 2331 for (kcol=0; kcol<bs; kcol++) { 2332 for (krow=0; krow<bs; krow++) { 2333 atmp = PetscAbsScalar(*aa);aa++; 2334 row = brow + krow; /* row index */ 2335 /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2336 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2337 } 2338 } 2339 aj++; 2340 } 2341 } 2342 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatCopy_SeqBAIJ" 2348 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2349 { 2350 PetscErrorCode ierr; 2351 2352 PetscFunctionBegin; 2353 /* If the two matrices have the same copy implementation, use fast copy. */ 2354 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2355 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2356 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2357 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2358 2359 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2360 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2361 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2362 } else { 2363 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2364 } 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2370 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2371 { 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2381 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2382 { 2383 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2384 2385 PetscFunctionBegin; 2386 *array = a->a; 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2392 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2393 { 2394 PetscFunctionBegin; 2395 PetscFunctionReturn(0); 2396 } 2397 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2400 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2401 { 2402 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2403 PetscErrorCode ierr; 2404 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2405 PetscBLASInt one=1; 2406 2407 PetscFunctionBegin; 2408 if (str == SAME_NONZERO_PATTERN) { 2409 PetscScalar alpha = a; 2410 PetscBLASInt bnz; 2411 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2412 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2413 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2414 if (y->xtoy && y->XtoY != X) { 2415 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2416 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2417 } 2418 if (!y->xtoy) { /* get xtoy */ 2419 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2420 y->XtoY = X; 2421 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2422 } 2423 for (i=0; i<x->nz; i++) { 2424 j = 0; 2425 while (j < bs2) { 2426 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2427 j++; 2428 } 2429 } 2430 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 2431 } else { 2432 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2433 } 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2439 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2440 { 2441 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2442 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2443 MatScalar *aa = a->a; 2444 2445 PetscFunctionBegin; 2446 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2452 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2453 { 2454 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2455 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2456 MatScalar *aa = a->a; 2457 2458 PetscFunctionBegin; 2459 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2460 PetscFunctionReturn(0); 2461 } 2462 2463 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2467 /* 2468 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2469 */ 2470 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2471 { 2472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2473 PetscErrorCode ierr; 2474 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2475 PetscInt nz = a->i[m],row,*jj,mr,col; 2476 2477 PetscFunctionBegin; 2478 *nn = n; 2479 if (!ia) PetscFunctionReturn(0); 2480 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2481 else { 2482 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 2483 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2484 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 2485 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 2486 jj = a->j; 2487 for (i=0; i<nz; i++) { 2488 collengths[jj[i]]++; 2489 } 2490 cia[0] = oshift; 2491 for (i=0; i<n; i++) { 2492 cia[i+1] = cia[i] + collengths[i]; 2493 } 2494 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2495 jj = a->j; 2496 for (row=0; row<m; row++) { 2497 mr = a->i[row+1] - a->i[row]; 2498 for (i=0; i<mr; i++) { 2499 col = *jj++; 2500 2501 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2502 } 2503 } 2504 ierr = PetscFree(collengths);CHKERRQ(ierr); 2505 *ia = cia; *ja = cja; 2506 } 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2512 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2513 { 2514 PetscErrorCode ierr; 2515 2516 PetscFunctionBegin; 2517 if (!ia) PetscFunctionReturn(0); 2518 ierr = PetscFree(*ia);CHKERRQ(ierr); 2519 ierr = PetscFree(*ja);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 #undef __FUNCT__ 2524 #define __FUNCT__ "MatFDColoringApply_BAIJ" 2525 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2526 { 2527 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 2528 PetscErrorCode ierr; 2529 PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow; 2530 PetscScalar dx,*y,*xx,*w3_array; 2531 PetscScalar *vscale_array; 2532 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2533 Vec w1 = coloring->w1,w2=coloring->w2,w3; 2534 void *fctx = coloring->fctx; 2535 PetscBool flg = PETSC_FALSE; 2536 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 2537 Vec x1_tmp; 2538 2539 PetscFunctionBegin; 2540 PetscValidHeaderSpecific(J,MAT_CLASSID,1); 2541 PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 2542 PetscValidHeaderSpecific(x1,VEC_CLASSID,3); 2543 if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2544 2545 ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2546 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2547 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 2548 if (flg) { 2549 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2550 } else { 2551 PetscBool assembled; 2552 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2553 if (assembled) { 2554 ierr = MatZeroEntries(J);CHKERRQ(ierr); 2555 } 2556 } 2557 2558 x1_tmp = x1; 2559 if (!coloring->vscale) { 2560 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2561 } 2562 2563 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2564 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2565 } 2566 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2567 2568 /* Set w1 = F(x1) */ 2569 if (!coloring->fset) { 2570 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2571 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2572 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2573 } else { 2574 coloring->fset = PETSC_FALSE; 2575 } 2576 2577 if (!coloring->w3) { 2578 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2579 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 2580 } 2581 w3 = coloring->w3; 2582 2583 /* Compute all the local scale factors, including ghost points */ 2584 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2585 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2586 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2587 if (ctype == IS_COLORING_GHOSTED) { 2588 col_start = 0; col_end = N; 2589 } else if (ctype == IS_COLORING_GLOBAL) { 2590 xx = xx - start; 2591 vscale_array = vscale_array - start; 2592 col_start = start; col_end = N + start; 2593 } 2594 for (col=col_start; col<col_end; col++) { 2595 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2596 if (coloring->htype[0] == 'w') { 2597 dx = 1.0 + unorm; 2598 } else { 2599 dx = xx[col]; 2600 } 2601 if (dx == (PetscScalar)0.0) dx = 1.0; 2602 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2603 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2604 dx *= epsilon; 2605 vscale_array[col] = (PetscScalar)1.0/dx; 2606 } 2607 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2608 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2609 if (ctype == IS_COLORING_GLOBAL) { 2610 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2611 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2612 } 2613 if (coloring->vscaleforrow) { 2614 vscaleforrow = coloring->vscaleforrow; 2615 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2616 2617 ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2618 /* 2619 Loop over each color 2620 */ 2621 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2622 for (k=0; k<coloring->ncolors; k++) { 2623 coloring->currentcolor = k; 2624 for (i=0; i<bs; i++) { 2625 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2626 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2627 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2628 /* 2629 Loop over each column associated with color 2630 adding the perturbation to the vector w3. 2631 */ 2632 for (l=0; l<coloring->ncolumns[k]; l++) { 2633 col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2634 if (coloring->htype[0] == 'w') { 2635 dx = 1.0 + unorm; 2636 } else { 2637 dx = xx[col]; 2638 } 2639 if (dx == (PetscScalar)0.0) dx = 1.0; 2640 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2641 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2642 dx *= epsilon; 2643 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2644 w3_array[col] += dx; 2645 } 2646 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2647 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2648 2649 /* 2650 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2651 w2 = F(x1 + dx) - F(x1) 2652 */ 2653 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2654 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2655 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2656 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2657 2658 /* 2659 Loop over rows of vector, putting results into Jacobian matrix 2660 */ 2661 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2662 for (l=0; l<coloring->nrows[k]; l++) { 2663 row = bs*coloring->rows[k][l]; /* local row index */ 2664 col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2665 for (j=0; j<bs; j++) { 2666 y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2667 srows[j] = row + start + j; 2668 } 2669 ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2670 } 2671 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2672 } 2673 } /* endof for each color */ 2674 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2675 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2676 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2677 ierr = PetscFree(srows);CHKERRQ(ierr); 2678 2679 coloring->currentcolor = -1; 2680 2681 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2682 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2683 ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 /* -------------------------------------------------------------------*/ 2688 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2689 MatGetRow_SeqBAIJ, 2690 MatRestoreRow_SeqBAIJ, 2691 MatMult_SeqBAIJ_N, 2692 /* 4*/ MatMultAdd_SeqBAIJ_N, 2693 MatMultTranspose_SeqBAIJ, 2694 MatMultTransposeAdd_SeqBAIJ, 2695 0, 2696 0, 2697 0, 2698 /* 10*/ 0, 2699 MatLUFactor_SeqBAIJ, 2700 0, 2701 0, 2702 MatTranspose_SeqBAIJ, 2703 /* 15*/ MatGetInfo_SeqBAIJ, 2704 MatEqual_SeqBAIJ, 2705 MatGetDiagonal_SeqBAIJ, 2706 MatDiagonalScale_SeqBAIJ, 2707 MatNorm_SeqBAIJ, 2708 /* 20*/ 0, 2709 MatAssemblyEnd_SeqBAIJ, 2710 MatSetOption_SeqBAIJ, 2711 MatZeroEntries_SeqBAIJ, 2712 /* 24*/ MatZeroRows_SeqBAIJ, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /* 29*/ MatSetUp_SeqBAIJ, 2718 0, 2719 0, 2720 0, 2721 0, 2722 /* 34*/ MatDuplicate_SeqBAIJ, 2723 0, 2724 0, 2725 MatILUFactor_SeqBAIJ, 2726 0, 2727 /* 39*/ MatAXPY_SeqBAIJ, 2728 MatGetSubMatrices_SeqBAIJ, 2729 MatIncreaseOverlap_SeqBAIJ, 2730 MatGetValues_SeqBAIJ, 2731 MatCopy_SeqBAIJ, 2732 /* 44*/ 0, 2733 MatScale_SeqBAIJ, 2734 0, 2735 0, 2736 MatZeroRowsColumns_SeqBAIJ, 2737 /* 49*/ 0, 2738 MatGetRowIJ_SeqBAIJ, 2739 MatRestoreRowIJ_SeqBAIJ, 2740 MatGetColumnIJ_SeqBAIJ, 2741 MatRestoreColumnIJ_SeqBAIJ, 2742 /* 54*/ MatFDColoringCreate_SeqAIJ, 2743 0, 2744 0, 2745 0, 2746 MatSetValuesBlocked_SeqBAIJ, 2747 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2748 MatDestroy_SeqBAIJ, 2749 MatView_SeqBAIJ, 2750 0, 2751 0, 2752 /* 64*/ 0, 2753 0, 2754 0, 2755 0, 2756 0, 2757 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2758 0, 2759 MatConvert_Basic, 2760 0, 2761 0, 2762 /* 74*/ 0, 2763 MatFDColoringApply_BAIJ, 2764 0, 2765 0, 2766 0, 2767 /* 79*/ 0, 2768 0, 2769 0, 2770 0, 2771 MatLoad_SeqBAIJ, 2772 /* 84*/ 0, 2773 0, 2774 0, 2775 0, 2776 0, 2777 /* 89*/ 0, 2778 0, 2779 0, 2780 0, 2781 0, 2782 /* 94*/ 0, 2783 0, 2784 0, 2785 0, 2786 0, 2787 /* 99*/ 0, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*104*/ 0, 2793 MatRealPart_SeqBAIJ, 2794 MatImaginaryPart_SeqBAIJ, 2795 0, 2796 0, 2797 /*109*/ 0, 2798 0, 2799 0, 2800 0, 2801 MatMissingDiagonal_SeqBAIJ, 2802 /*114*/ 0, 2803 0, 2804 0, 2805 0, 2806 0, 2807 /*119*/ 0, 2808 0, 2809 MatMultHermitianTranspose_SeqBAIJ, 2810 MatMultHermitianTransposeAdd_SeqBAIJ, 2811 0, 2812 /*124*/ 0, 2813 0, 2814 MatInvertBlockDiagonal_SeqBAIJ, 2815 0, 2816 0, 2817 /*129*/ 0, 2818 0, 2819 0, 2820 0, 2821 0, 2822 /*134*/ 0, 2823 0, 2824 0, 2825 0, 2826 0, 2827 /*139*/ 0, 2828 0, 2829 0 2830 }; 2831 2832 #undef __FUNCT__ 2833 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2834 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2835 { 2836 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2837 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2838 PetscErrorCode ierr; 2839 2840 PetscFunctionBegin; 2841 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2842 2843 /* allocate space for values if not already there */ 2844 if (!aij->saved_values) { 2845 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 2846 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2847 } 2848 2849 /* copy values over */ 2850 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2856 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2857 { 2858 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2859 PetscErrorCode ierr; 2860 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2861 2862 PetscFunctionBegin; 2863 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2864 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2865 2866 /* copy values over */ 2867 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2868 PetscFunctionReturn(0); 2869 } 2870 2871 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2872 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2876 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2877 { 2878 Mat_SeqBAIJ *b; 2879 PetscErrorCode ierr; 2880 PetscInt i,mbs,nbs,bs2; 2881 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2882 2883 PetscFunctionBegin; 2884 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2885 if (nz == MAT_SKIP_ALLOCATION) { 2886 skipallocation = PETSC_TRUE; 2887 nz = 0; 2888 } 2889 2890 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2891 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2892 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2893 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2894 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2895 2896 B->preallocated = PETSC_TRUE; 2897 2898 mbs = B->rmap->n/bs; 2899 nbs = B->cmap->n/bs; 2900 bs2 = bs*bs; 2901 2902 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2903 2904 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2905 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2906 if (nnz) { 2907 for (i=0; i<mbs; i++) { 2908 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2909 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2910 } 2911 } 2912 2913 b = (Mat_SeqBAIJ*)B->data; 2914 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2915 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2916 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2917 2918 if (!flg) { 2919 switch (bs) { 2920 case 1: 2921 B->ops->mult = MatMult_SeqBAIJ_1; 2922 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2923 break; 2924 case 2: 2925 B->ops->mult = MatMult_SeqBAIJ_2; 2926 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2927 break; 2928 case 3: 2929 B->ops->mult = MatMult_SeqBAIJ_3; 2930 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2931 break; 2932 case 4: 2933 B->ops->mult = MatMult_SeqBAIJ_4; 2934 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2935 break; 2936 case 5: 2937 B->ops->mult = MatMult_SeqBAIJ_5; 2938 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2939 break; 2940 case 6: 2941 B->ops->mult = MatMult_SeqBAIJ_6; 2942 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2943 break; 2944 case 7: 2945 B->ops->mult = MatMult_SeqBAIJ_7; 2946 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2947 break; 2948 case 15: 2949 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2950 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2951 break; 2952 default: 2953 B->ops->mult = MatMult_SeqBAIJ_N; 2954 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2955 break; 2956 } 2957 } 2958 B->ops->sor = MatSOR_SeqBAIJ; 2959 b->mbs = mbs; 2960 b->nbs = nbs; 2961 if (!skipallocation) { 2962 if (!b->imax) { 2963 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 2964 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2965 2966 b->free_imax_ilen = PETSC_TRUE; 2967 } 2968 /* b->ilen will count nonzeros in each block row so far. */ 2969 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2970 if (!nnz) { 2971 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2972 else if (nz < 0) nz = 1; 2973 for (i=0; i<mbs; i++) b->imax[i] = nz; 2974 nz = nz*mbs; 2975 } else { 2976 nz = 0; 2977 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2978 } 2979 2980 /* allocate the matrix space */ 2981 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2982 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2983 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2984 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2985 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2986 2987 b->singlemalloc = PETSC_TRUE; 2988 b->i[0] = 0; 2989 for (i=1; i<mbs+1; i++) { 2990 b->i[i] = b->i[i-1] + b->imax[i-1]; 2991 } 2992 b->free_a = PETSC_TRUE; 2993 b->free_ij = PETSC_TRUE; 2994 #if defined(PETSC_THREADCOMM_ACTIVE) 2995 ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr); 2996 #endif 2997 } else { 2998 b->free_a = PETSC_FALSE; 2999 b->free_ij = PETSC_FALSE; 3000 } 3001 3002 b->bs2 = bs2; 3003 b->mbs = mbs; 3004 b->nz = 0; 3005 b->maxnz = nz; 3006 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3007 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 3008 PetscFunctionReturn(0); 3009 } 3010 3011 #undef __FUNCT__ 3012 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 3013 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3014 { 3015 PetscInt i,m,nz,nz_max=0,*nnz; 3016 PetscScalar *values=0; 3017 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3018 PetscErrorCode ierr; 3019 3020 PetscFunctionBegin; 3021 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 3022 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3023 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3024 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3025 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3026 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3027 m = B->rmap->n/bs; 3028 3029 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 3030 ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3031 for (i=0; i<m; i++) { 3032 nz = ii[i+1]- ii[i]; 3033 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 3034 nz_max = PetscMax(nz_max, nz); 3035 nnz[i] = nz; 3036 } 3037 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3038 ierr = PetscFree(nnz);CHKERRQ(ierr); 3039 3040 values = (PetscScalar*)V; 3041 if (!values) { 3042 ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3043 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3044 } 3045 for (i=0; i<m; i++) { 3046 PetscInt ncols = ii[i+1] - ii[i]; 3047 const PetscInt *icols = jj + ii[i]; 3048 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3049 if (!roworiented) { 3050 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3051 } else { 3052 PetscInt j; 3053 for (j=0; j<ncols; j++) { 3054 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3055 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3056 } 3057 } 3058 } 3059 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3060 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3062 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3063 PetscFunctionReturn(0); 3064 } 3065 3066 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3067 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 3068 #if defined(PETSC_HAVE_MUMPS) 3069 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3070 #endif 3071 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 3072 3073 /*MC 3074 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3075 block sparse compressed row format. 3076 3077 Options Database Keys: 3078 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3079 3080 Level: beginner 3081 3082 .seealso: MatCreateSeqBAIJ() 3083 M*/ 3084 3085 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3086 3087 #undef __FUNCT__ 3088 #define __FUNCT__ "MatCreate_SeqBAIJ" 3089 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3090 { 3091 PetscErrorCode ierr; 3092 PetscMPIInt size; 3093 Mat_SeqBAIJ *b; 3094 3095 PetscFunctionBegin; 3096 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3097 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3098 3099 ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3100 B->data = (void*)b; 3101 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3102 3103 b->row = 0; 3104 b->col = 0; 3105 b->icol = 0; 3106 b->reallocs = 0; 3107 b->saved_values = 0; 3108 3109 b->roworiented = PETSC_TRUE; 3110 b->nonew = 0; 3111 b->diag = 0; 3112 b->solve_work = 0; 3113 b->mult_work = 0; 3114 B->spptr = 0; 3115 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3116 b->keepnonzeropattern = PETSC_FALSE; 3117 b->xtoy = 0; 3118 b->XtoY = 0; 3119 B->same_nonzero = PETSC_FALSE; 3120 3121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3124 #if defined(PETSC_HAVE_MUMPS) 3125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3126 #endif 3127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3129 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3132 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3133 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3135 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3136 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3137 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3138 PetscFunctionReturn(0); 3139 } 3140 3141 #undef __FUNCT__ 3142 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3143 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3144 { 3145 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3146 PetscErrorCode ierr; 3147 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3148 3149 PetscFunctionBegin; 3150 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3151 3152 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3153 c->imax = a->imax; 3154 c->ilen = a->ilen; 3155 c->free_imax_ilen = PETSC_FALSE; 3156 } else { 3157 ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3158 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3159 for (i=0; i<mbs; i++) { 3160 c->imax[i] = a->imax[i]; 3161 c->ilen[i] = a->ilen[i]; 3162 } 3163 c->free_imax_ilen = PETSC_TRUE; 3164 } 3165 3166 /* allocate the matrix space */ 3167 if (mallocmatspace) { 3168 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3169 ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr); 3170 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3171 ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr); 3172 3173 c->i = a->i; 3174 c->j = a->j; 3175 c->singlemalloc = PETSC_FALSE; 3176 c->free_a = PETSC_TRUE; 3177 c->free_ij = PETSC_FALSE; 3178 c->parent = A; 3179 C->preallocated = PETSC_TRUE; 3180 C->assembled = PETSC_TRUE; 3181 3182 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3183 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3184 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3185 } else { 3186 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3187 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3188 3189 c->singlemalloc = PETSC_TRUE; 3190 c->free_a = PETSC_TRUE; 3191 c->free_ij = PETSC_TRUE; 3192 3193 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3194 if (mbs > 0) { 3195 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3196 if (cpvalues == MAT_COPY_VALUES) { 3197 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3198 } else { 3199 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3200 } 3201 } 3202 C->preallocated = PETSC_TRUE; 3203 C->assembled = PETSC_TRUE; 3204 } 3205 } 3206 3207 c->roworiented = a->roworiented; 3208 c->nonew = a->nonew; 3209 3210 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3211 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3212 3213 c->bs2 = a->bs2; 3214 c->mbs = a->mbs; 3215 c->nbs = a->nbs; 3216 3217 if (a->diag) { 3218 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3219 c->diag = a->diag; 3220 c->free_diag = PETSC_FALSE; 3221 } else { 3222 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 3223 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3224 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3225 c->free_diag = PETSC_TRUE; 3226 } 3227 } else c->diag = 0; 3228 3229 c->nz = a->nz; 3230 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3231 c->solve_work = 0; 3232 c->mult_work = 0; 3233 3234 c->compressedrow.use = a->compressedrow.use; 3235 c->compressedrow.nrows = a->compressedrow.nrows; 3236 if (a->compressedrow.use) { 3237 i = a->compressedrow.nrows; 3238 ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr); 3239 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3240 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3241 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3242 } else { 3243 c->compressedrow.use = PETSC_FALSE; 3244 c->compressedrow.i = NULL; 3245 c->compressedrow.rindex = NULL; 3246 } 3247 C->same_nonzero = A->same_nonzero; 3248 3249 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3250 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3251 PetscFunctionReturn(0); 3252 } 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3256 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3257 { 3258 PetscErrorCode ierr; 3259 3260 PetscFunctionBegin; 3261 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3262 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3263 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3264 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 #undef __FUNCT__ 3269 #define __FUNCT__ "MatLoad_SeqBAIJ" 3270 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3271 { 3272 Mat_SeqBAIJ *a; 3273 PetscErrorCode ierr; 3274 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3275 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3276 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3277 PetscInt *masked,nmask,tmp,bs2,ishift; 3278 PetscMPIInt size; 3279 int fd; 3280 PetscScalar *aa; 3281 MPI_Comm comm; 3282 3283 PetscFunctionBegin; 3284 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3285 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3286 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3287 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3288 bs2 = bs*bs; 3289 3290 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3291 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3293 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3294 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3295 M = header[1]; N = header[2]; nz = header[3]; 3296 3297 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3298 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3299 3300 /* 3301 This code adds extra rows to make sure the number of rows is 3302 divisible by the blocksize 3303 */ 3304 mbs = M/bs; 3305 extra_rows = bs - M + bs*(mbs); 3306 if (extra_rows == bs) extra_rows = 0; 3307 else mbs++; 3308 if (extra_rows) { 3309 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3310 } 3311 3312 /* Set global sizes if not already set */ 3313 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3314 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3315 } else { /* Check if the matrix global sizes are correct */ 3316 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3317 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3318 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3319 } 3320 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3321 } 3322 3323 /* read in row lengths */ 3324 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3325 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3326 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3327 3328 /* read in column indices */ 3329 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3330 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3331 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3332 3333 /* loop over row lengths determining block row lengths */ 3334 ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3335 ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3336 ierr = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr); 3337 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3338 rowcount = 0; 3339 nzcount = 0; 3340 for (i=0; i<mbs; i++) { 3341 nmask = 0; 3342 for (j=0; j<bs; j++) { 3343 kmax = rowlengths[rowcount]; 3344 for (k=0; k<kmax; k++) { 3345 tmp = jj[nzcount++]/bs; 3346 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3347 } 3348 rowcount++; 3349 } 3350 browlengths[i] += nmask; 3351 /* zero out the mask elements we set */ 3352 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3353 } 3354 3355 /* Do preallocation */ 3356 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3357 a = (Mat_SeqBAIJ*)newmat->data; 3358 3359 /* set matrix "i" values */ 3360 a->i[0] = 0; 3361 for (i=1; i<= mbs; i++) { 3362 a->i[i] = a->i[i-1] + browlengths[i-1]; 3363 a->ilen[i-1] = browlengths[i-1]; 3364 } 3365 a->nz = 0; 3366 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3367 3368 /* read in nonzero values */ 3369 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 3370 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3371 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3372 3373 /* set "a" and "j" values into matrix */ 3374 nzcount = 0; jcount = 0; 3375 for (i=0; i<mbs; i++) { 3376 nzcountb = nzcount; 3377 nmask = 0; 3378 for (j=0; j<bs; j++) { 3379 kmax = rowlengths[i*bs+j]; 3380 for (k=0; k<kmax; k++) { 3381 tmp = jj[nzcount++]/bs; 3382 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3383 } 3384 } 3385 /* sort the masked values */ 3386 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3387 3388 /* set "j" values into matrix */ 3389 maskcount = 1; 3390 for (j=0; j<nmask; j++) { 3391 a->j[jcount++] = masked[j]; 3392 mask[masked[j]] = maskcount++; 3393 } 3394 /* set "a" values into matrix */ 3395 ishift = bs2*a->i[i]; 3396 for (j=0; j<bs; j++) { 3397 kmax = rowlengths[i*bs+j]; 3398 for (k=0; k<kmax; k++) { 3399 tmp = jj[nzcountb]/bs; 3400 block = mask[tmp] - 1; 3401 point = jj[nzcountb] - bs*tmp; 3402 idx = ishift + bs2*block + j + bs*point; 3403 a->a[idx] = (MatScalar)aa[nzcountb++]; 3404 } 3405 } 3406 /* zero out the mask elements we set */ 3407 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3408 } 3409 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3410 3411 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3412 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3413 ierr = PetscFree(aa);CHKERRQ(ierr); 3414 ierr = PetscFree(jj);CHKERRQ(ierr); 3415 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3416 3417 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3418 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3419 PetscFunctionReturn(0); 3420 } 3421 3422 #undef __FUNCT__ 3423 #define __FUNCT__ "MatCreateSeqBAIJ" 3424 /*@C 3425 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3426 compressed row) format. For good matrix assembly performance the 3427 user should preallocate the matrix storage by setting the parameter nz 3428 (or the array nnz). By setting these parameters accurately, performance 3429 during matrix assembly can be increased by more than a factor of 50. 3430 3431 Collective on MPI_Comm 3432 3433 Input Parameters: 3434 + comm - MPI communicator, set to PETSC_COMM_SELF 3435 . bs - size of block 3436 . m - number of rows 3437 . n - number of columns 3438 . nz - number of nonzero blocks per block row (same for all rows) 3439 - nnz - array containing the number of nonzero blocks in the various block rows 3440 (possibly different for each block row) or NULL 3441 3442 Output Parameter: 3443 . A - the matrix 3444 3445 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3446 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3447 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3448 3449 Options Database Keys: 3450 . -mat_no_unroll - uses code that does not unroll the loops in the 3451 block calculations (much slower) 3452 . -mat_block_size - size of the blocks to use 3453 3454 Level: intermediate 3455 3456 Notes: 3457 The number of rows and columns must be divisible by blocksize. 3458 3459 If the nnz parameter is given then the nz parameter is ignored 3460 3461 A nonzero block is any block that as 1 or more nonzeros in it 3462 3463 The block AIJ format is fully compatible with standard Fortran 77 3464 storage. That is, the stored row and column indices can begin at 3465 either one (as in Fortran) or zero. See the users' manual for details. 3466 3467 Specify the preallocated storage with either nz or nnz (not both). 3468 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3469 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3470 matrices. 3471 3472 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3473 @*/ 3474 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3475 { 3476 PetscErrorCode ierr; 3477 3478 PetscFunctionBegin; 3479 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3480 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3481 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3482 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3483 PetscFunctionReturn(0); 3484 } 3485 3486 #undef __FUNCT__ 3487 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3488 /*@C 3489 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3490 per row in the matrix. For good matrix assembly performance the 3491 user should preallocate the matrix storage by setting the parameter nz 3492 (or the array nnz). By setting these parameters accurately, performance 3493 during matrix assembly can be increased by more than a factor of 50. 3494 3495 Collective on MPI_Comm 3496 3497 Input Parameters: 3498 + A - the matrix 3499 . bs - size of block 3500 . nz - number of block nonzeros per block row (same for all rows) 3501 - nnz - array containing the number of block nonzeros in the various block rows 3502 (possibly different for each block row) or NULL 3503 3504 Options Database Keys: 3505 . -mat_no_unroll - uses code that does not unroll the loops in the 3506 block calculations (much slower) 3507 . -mat_block_size - size of the blocks to use 3508 3509 Level: intermediate 3510 3511 Notes: 3512 If the nnz parameter is given then the nz parameter is ignored 3513 3514 You can call MatGetInfo() to get information on how effective the preallocation was; 3515 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3516 You can also run with the option -info and look for messages with the string 3517 malloc in them to see if additional memory allocation was needed. 3518 3519 The block AIJ format is fully compatible with standard Fortran 77 3520 storage. That is, the stored row and column indices can begin at 3521 either one (as in Fortran) or zero. See the users' manual for details. 3522 3523 Specify the preallocated storage with either nz or nnz (not both). 3524 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3525 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3526 3527 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3528 @*/ 3529 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3530 { 3531 PetscErrorCode ierr; 3532 3533 PetscFunctionBegin; 3534 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3535 PetscValidType(B,1); 3536 PetscValidLogicalCollectiveInt(B,bs,2); 3537 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3538 PetscFunctionReturn(0); 3539 } 3540 3541 #undef __FUNCT__ 3542 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3543 /*@C 3544 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3545 (the default sequential PETSc format). 3546 3547 Collective on MPI_Comm 3548 3549 Input Parameters: 3550 + A - the matrix 3551 . i - the indices into j for the start of each local row (starts with zero) 3552 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3553 - v - optional values in the matrix 3554 3555 Level: developer 3556 3557 Notes: 3558 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3559 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3560 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3561 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3562 block column and the second index is over columns within a block. 3563 3564 .keywords: matrix, aij, compressed row, sparse 3565 3566 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3567 @*/ 3568 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3569 { 3570 PetscErrorCode ierr; 3571 3572 PetscFunctionBegin; 3573 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3574 PetscValidType(B,1); 3575 PetscValidLogicalCollectiveInt(B,bs,2); 3576 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3577 PetscFunctionReturn(0); 3578 } 3579 3580 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3583 /*@ 3584 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3585 3586 Collective on MPI_Comm 3587 3588 Input Parameters: 3589 + comm - must be an MPI communicator of size 1 3590 . bs - size of block 3591 . m - number of rows 3592 . n - number of columns 3593 . i - row indices 3594 . j - column indices 3595 - a - matrix values 3596 3597 Output Parameter: 3598 . mat - the matrix 3599 3600 Level: advanced 3601 3602 Notes: 3603 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3604 once the matrix is destroyed 3605 3606 You cannot set new nonzero locations into this matrix, that will generate an error. 3607 3608 The i and j indices are 0 based 3609 3610 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3611 3612 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3613 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3614 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3615 with column-major ordering within blocks. 3616 3617 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3618 3619 @*/ 3620 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3621 { 3622 PetscErrorCode ierr; 3623 PetscInt ii; 3624 Mat_SeqBAIJ *baij; 3625 3626 PetscFunctionBegin; 3627 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3628 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3629 3630 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3631 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3632 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3633 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3634 baij = (Mat_SeqBAIJ*)(*mat)->data; 3635 ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3636 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3637 3638 baij->i = i; 3639 baij->j = j; 3640 baij->a = a; 3641 3642 baij->singlemalloc = PETSC_FALSE; 3643 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3644 baij->free_a = PETSC_FALSE; 3645 baij->free_ij = PETSC_FALSE; 3646 3647 for (ii=0; ii<m; ii++) { 3648 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3649 #if defined(PETSC_USE_DEBUG) 3650 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3651 #endif 3652 } 3653 #if defined(PETSC_USE_DEBUG) 3654 for (ii=0; ii<baij->i[m]; ii++) { 3655 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3656 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3657 } 3658 #endif 3659 3660 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3661 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3662 PetscFunctionReturn(0); 3663 } 3664