1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr); 156 } 157 work = a->mult_work; 158 t = work + k+1; 159 if (!a->sor_work) { 160 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 161 } 162 w = a->sor_work; 163 164 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 165 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 166 167 if (flag & SOR_ZERO_INITIAL_GUESS) { 168 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 169 switch (bs) { 170 case 1: 171 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 172 t[0] = b[0]; 173 i2 = 1; 174 idiag += 1; 175 for (i=1; i<m; i++) { 176 v = aa + ai[i]; 177 vi = aj + ai[i]; 178 nz = diag[i] - ai[i]; 179 s[0] = b[i2]; 180 for (j=0; j<nz; j++) { 181 xw[0] = x[vi[j]]; 182 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 183 } 184 t[i2] = s[0]; 185 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 186 x[i2] = xw[0]; 187 idiag += 1; 188 i2 += 1; 189 } 190 break; 191 case 2: 192 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 193 t[0] = b[0]; t[1] = b[1]; 194 i2 = 2; 195 idiag += 4; 196 for (i=1; i<m; i++) { 197 v = aa + 4*ai[i]; 198 vi = aj + ai[i]; 199 nz = diag[i] - ai[i]; 200 s[0] = b[i2]; s[1] = b[i2+1]; 201 for (j=0; j<nz; j++) { 202 idx = 2*vi[j]; 203 it = 4*j; 204 xw[0] = x[idx]; xw[1] = x[1+idx]; 205 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 206 } 207 t[i2] = s[0]; t[i2+1] = s[1]; 208 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 209 x[i2] = xw[0]; x[i2+1] = xw[1]; 210 idiag += 4; 211 i2 += 2; 212 } 213 break; 214 case 3: 215 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 216 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 217 i2 = 3; 218 idiag += 9; 219 for (i=1; i<m; i++) { 220 v = aa + 9*ai[i]; 221 vi = aj + ai[i]; 222 nz = diag[i] - ai[i]; 223 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 224 while (nz--) { 225 idx = 3*(*vi++); 226 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 228 v += 9; 229 } 230 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 231 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 232 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 233 idiag += 9; 234 i2 += 3; 235 } 236 break; 237 case 4: 238 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 239 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 240 i2 = 4; 241 idiag += 16; 242 for (i=1; i<m; i++) { 243 v = aa + 16*ai[i]; 244 vi = aj + ai[i]; 245 nz = diag[i] - ai[i]; 246 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 247 while (nz--) { 248 idx = 4*(*vi++); 249 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 250 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 251 v += 16; 252 } 253 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 254 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 255 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 256 idiag += 16; 257 i2 += 4; 258 } 259 break; 260 case 5: 261 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 262 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 263 i2 = 5; 264 idiag += 25; 265 for (i=1; i<m; i++) { 266 v = aa + 25*ai[i]; 267 vi = aj + ai[i]; 268 nz = diag[i] - ai[i]; 269 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 270 while (nz--) { 271 idx = 5*(*vi++); 272 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 273 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 274 v += 25; 275 } 276 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 277 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 278 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 279 idiag += 25; 280 i2 += 5; 281 } 282 break; 283 case 6: 284 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 285 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 286 i2 = 6; 287 idiag += 36; 288 for (i=1; i<m; i++) { 289 v = aa + 36*ai[i]; 290 vi = aj + ai[i]; 291 nz = diag[i] - ai[i]; 292 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 293 while (nz--) { 294 idx = 6*(*vi++); 295 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 296 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 297 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 298 v += 36; 299 } 300 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 301 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 302 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 303 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 304 idiag += 36; 305 i2 += 6; 306 } 307 break; 308 case 7: 309 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 310 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 311 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 312 i2 = 7; 313 idiag += 49; 314 for (i=1; i<m; i++) { 315 v = aa + 49*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 319 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 320 while (nz--) { 321 idx = 7*(*vi++); 322 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 323 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 324 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 325 v += 49; 326 } 327 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 328 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 329 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 330 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 331 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 332 idiag += 49; 333 i2 += 7; 334 } 335 break; 336 default: 337 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 338 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 339 i2 = bs; 340 idiag += bs2; 341 for (i=1; i<m; i++) { 342 v = aa + bs2*ai[i]; 343 vi = aj + ai[i]; 344 nz = diag[i] - ai[i]; 345 346 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 347 /* copy all rows of x that are needed into contiguous space */ 348 workt = work; 349 for (j=0; j<nz; j++) { 350 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 351 workt += bs; 352 } 353 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 354 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 355 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 356 357 idiag += bs2; 358 i2 += bs; 359 } 360 break; 361 } 362 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 363 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 364 xb = t; 365 } 366 else xb = b; 367 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 368 idiag = a->idiag+bs2*(a->mbs-1); 369 i2 = bs * (m-1); 370 switch (bs) { 371 case 1: 372 s[0] = xb[i2]; 373 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 374 x[i2] = xw[0]; 375 i2 -= 1; 376 for (i=m-2; i>=0; i--) { 377 v = aa + (diag[i]+1); 378 vi = aj + diag[i] + 1; 379 nz = ai[i+1] - diag[i] - 1; 380 s[0] = xb[i2]; 381 for (j=0; j<nz; j++) { 382 xw[0] = x[vi[j]]; 383 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 384 } 385 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 386 x[i2] = xw[0]; 387 idiag -= 1; 388 i2 -= 1; 389 } 390 break; 391 case 2: 392 s[0] = xb[i2]; s[1] = xb[i2+1]; 393 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 394 x[i2] = xw[0]; x[i2+1] = xw[1]; 395 i2 -= 2; 396 idiag -= 4; 397 for (i=m-2; i>=0; i--) { 398 v = aa + 4*(diag[i] + 1); 399 vi = aj + diag[i] + 1; 400 nz = ai[i+1] - diag[i] - 1; 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 for (j=0; j<nz; j++) { 403 idx = 2*vi[j]; 404 it = 4*j; 405 xw[0] = x[idx]; xw[1] = x[1+idx]; 406 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 407 } 408 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 409 x[i2] = xw[0]; x[i2+1] = xw[1]; 410 idiag -= 4; 411 i2 -= 2; 412 } 413 break; 414 case 3: 415 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 416 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 417 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 418 i2 -= 3; 419 idiag -= 9; 420 for (i=m-2; i>=0; i--) { 421 v = aa + 9*(diag[i]+1); 422 vi = aj + diag[i] + 1; 423 nz = ai[i+1] - diag[i] - 1; 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 while (nz--) { 426 idx = 3*(*vi++); 427 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 429 v += 9; 430 } 431 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 432 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 433 idiag -= 9; 434 i2 -= 3; 435 } 436 break; 437 case 4: 438 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 439 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 440 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 441 i2 -= 4; 442 idiag -= 16; 443 for (i=m-2; i>=0; i--) { 444 v = aa + 16*(diag[i]+1); 445 vi = aj + diag[i] + 1; 446 nz = ai[i+1] - diag[i] - 1; 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 while (nz--) { 449 idx = 4*(*vi++); 450 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 451 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 452 v += 16; 453 } 454 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 455 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 456 idiag -= 16; 457 i2 -= 4; 458 } 459 break; 460 case 5: 461 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 462 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 463 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 464 i2 -= 5; 465 idiag -= 25; 466 for (i=m-2; i>=0; i--) { 467 v = aa + 25*(diag[i]+1); 468 vi = aj + diag[i] + 1; 469 nz = ai[i+1] - diag[i] - 1; 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 while (nz--) { 472 idx = 5*(*vi++); 473 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 474 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 475 v += 25; 476 } 477 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 478 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 479 idiag -= 25; 480 i2 -= 5; 481 } 482 break; 483 case 6: 484 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 485 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 486 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 487 i2 -= 6; 488 idiag -= 36; 489 for (i=m-2; i>=0; i--) { 490 v = aa + 36*(diag[i]+1); 491 vi = aj + diag[i] + 1; 492 nz = ai[i+1] - diag[i] - 1; 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 while (nz--) { 495 idx = 6*(*vi++); 496 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 497 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 498 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 499 v += 36; 500 } 501 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 502 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 503 idiag -= 36; 504 i2 -= 6; 505 } 506 break; 507 case 7: 508 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 509 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 510 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 512 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 513 i2 -= 7; 514 idiag -= 49; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 49*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 520 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 521 while (nz--) { 522 idx = 7*(*vi++); 523 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 524 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 525 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 526 v += 49; 527 } 528 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 529 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 530 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 531 idiag -= 49; 532 i2 -= 7; 533 } 534 break; 535 default: 536 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 537 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 538 i2 -= bs; 539 idiag -= bs2; 540 for (i=m-2; i>=0; i--) { 541 v = aa + bs2*(diag[i]+1); 542 vi = aj + diag[i] + 1; 543 nz = ai[i+1] - diag[i] - 1; 544 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 /* copy all rows of x that are needed into contiguous space */ 547 workt = work; 548 for (j=0; j<nz; j++) { 549 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 550 workt += bs; 551 } 552 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 553 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 554 555 idiag -= bs2; 556 i2 -= bs; 557 } 558 break; 559 } 560 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 561 } 562 its--; 563 } 564 while (its--) { 565 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 566 idiag = a->idiag; 567 i2 = 0; 568 switch (bs) { 569 case 1: 570 for (i=0; i<m; i++) { 571 v = aa + ai[i]; 572 vi = aj + ai[i]; 573 nz = ai[i+1] - ai[i]; 574 s[0] = b[i2]; 575 for (j=0; j<nz; j++) { 576 xw[0] = x[vi[j]]; 577 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 578 } 579 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 580 x[i2] += xw[0]; 581 idiag += 1; 582 i2 += 1; 583 } 584 break; 585 case 2: 586 for (i=0; i<m; i++) { 587 v = aa + 4*ai[i]; 588 vi = aj + ai[i]; 589 nz = ai[i+1] - ai[i]; 590 s[0] = b[i2]; s[1] = b[i2+1]; 591 for (j=0; j<nz; j++) { 592 idx = 2*vi[j]; 593 it = 4*j; 594 xw[0] = x[idx]; xw[1] = x[1+idx]; 595 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 596 } 597 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 598 x[i2] += xw[0]; x[i2+1] += xw[1]; 599 idiag += 4; 600 i2 += 2; 601 } 602 break; 603 case 3: 604 for (i=0; i<m; i++) { 605 v = aa + 9*ai[i]; 606 vi = aj + ai[i]; 607 nz = ai[i+1] - ai[i]; 608 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 609 while (nz--) { 610 idx = 3*(*vi++); 611 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 612 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 613 v += 9; 614 } 615 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 616 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 617 idiag += 9; 618 i2 += 3; 619 } 620 break; 621 case 4: 622 for (i=0; i<m; i++) { 623 v = aa + 16*ai[i]; 624 vi = aj + ai[i]; 625 nz = ai[i+1] - ai[i]; 626 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 627 while (nz--) { 628 idx = 4*(*vi++); 629 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 630 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 631 v += 16; 632 } 633 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 634 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 635 idiag += 16; 636 i2 += 4; 637 } 638 break; 639 case 5: 640 for (i=0; i<m; i++) { 641 v = aa + 25*ai[i]; 642 vi = aj + ai[i]; 643 nz = ai[i+1] - ai[i]; 644 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 645 while (nz--) { 646 idx = 5*(*vi++); 647 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 648 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 649 v += 25; 650 } 651 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 652 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 653 idiag += 25; 654 i2 += 5; 655 } 656 break; 657 case 6: 658 for (i=0; i<m; i++) { 659 v = aa + 36*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 663 while (nz--) { 664 idx = 6*(*vi++); 665 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 666 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 668 v += 36; 669 } 670 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 671 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 672 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 673 idiag += 36; 674 i2 += 6; 675 } 676 break; 677 case 7: 678 for (i=0; i<m; i++) { 679 v = aa + 49*ai[i]; 680 vi = aj + ai[i]; 681 nz = ai[i+1] - ai[i]; 682 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 683 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 684 while (nz--) { 685 idx = 7*(*vi++); 686 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 687 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 688 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 689 v += 49; 690 } 691 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 692 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 693 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 694 idiag += 49; 695 i2 += 7; 696 } 697 break; 698 default: 699 for (i=0; i<m; i++) { 700 v = aa + bs2*ai[i]; 701 vi = aj + ai[i]; 702 nz = ai[i+1] - ai[i]; 703 704 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 705 /* copy all rows of x that are needed into contiguous space */ 706 workt = work; 707 for (j=0; j<nz; j++) { 708 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 709 workt += bs; 710 } 711 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 712 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 713 714 idiag += bs2; 715 i2 += bs; 716 } 717 break; 718 } 719 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 720 } 721 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 722 idiag = a->idiag+bs2*(a->mbs-1); 723 i2 = bs * (m-1); 724 switch (bs) { 725 case 1: 726 for (i=m-1; i>=0; i--) { 727 v = aa + ai[i]; 728 vi = aj + ai[i]; 729 nz = ai[i+1] - ai[i]; 730 s[0] = b[i2]; 731 for (j=0; j<nz; j++) { 732 xw[0] = x[vi[j]]; 733 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 734 } 735 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 736 x[i2] += xw[0]; 737 idiag -= 1; 738 i2 -= 1; 739 } 740 break; 741 case 2: 742 for (i=m-1; i>=0; i--) { 743 v = aa + 4*ai[i]; 744 vi = aj + ai[i]; 745 nz = ai[i+1] - ai[i]; 746 s[0] = b[i2]; s[1] = b[i2+1]; 747 for (j=0; j<nz; j++) { 748 idx = 2*vi[j]; 749 it = 4*j; 750 xw[0] = x[idx]; xw[1] = x[1+idx]; 751 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 752 } 753 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 754 x[i2] += xw[0]; x[i2+1] += xw[1]; 755 idiag -= 4; 756 i2 -= 2; 757 } 758 break; 759 case 3: 760 for (i=m-1; i>=0; i--) { 761 v = aa + 9*ai[i]; 762 vi = aj + ai[i]; 763 nz = ai[i+1] - ai[i]; 764 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 765 while (nz--) { 766 idx = 3*(*vi++); 767 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 768 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 769 v += 9; 770 } 771 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 772 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 773 idiag -= 9; 774 i2 -= 3; 775 } 776 break; 777 case 4: 778 for (i=m-1; i>=0; i--) { 779 v = aa + 16*ai[i]; 780 vi = aj + ai[i]; 781 nz = ai[i+1] - ai[i]; 782 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 783 while (nz--) { 784 idx = 4*(*vi++); 785 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 786 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 787 v += 16; 788 } 789 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 790 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 791 idiag -= 16; 792 i2 -= 4; 793 } 794 break; 795 case 5: 796 for (i=m-1; i>=0; i--) { 797 v = aa + 25*ai[i]; 798 vi = aj + ai[i]; 799 nz = ai[i+1] - ai[i]; 800 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 801 while (nz--) { 802 idx = 5*(*vi++); 803 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 804 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 805 v += 25; 806 } 807 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 808 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 809 idiag -= 25; 810 i2 -= 5; 811 } 812 break; 813 case 6: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 36*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 819 while (nz--) { 820 idx = 6*(*vi++); 821 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 822 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 824 v += 36; 825 } 826 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 827 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 828 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 829 idiag -= 36; 830 i2 -= 6; 831 } 832 break; 833 case 7: 834 for (i=m-1; i>=0; i--) { 835 v = aa + 49*ai[i]; 836 vi = aj + ai[i]; 837 nz = ai[i+1] - ai[i]; 838 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 839 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 840 while (nz--) { 841 idx = 7*(*vi++); 842 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 843 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 844 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 845 v += 49; 846 } 847 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 848 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 849 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 850 idiag -= 49; 851 i2 -= 7; 852 } 853 break; 854 default: 855 for (i=m-1; i>=0; i--) { 856 v = aa + bs2*ai[i]; 857 vi = aj + ai[i]; 858 nz = ai[i+1] - ai[i]; 859 860 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 861 /* copy all rows of x that are needed into contiguous space */ 862 workt = work; 863 for (j=0; j<nz; j++) { 864 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 workt += bs; 866 } 867 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 868 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 869 870 idiag -= bs2; 871 i2 -= bs; 872 } 873 break; 874 } 875 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 876 } 877 } 878 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 879 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 880 PetscFunctionReturn(0); 881 } 882 883 884 /* 885 Special version for direct calls from Fortran (Used in PETSc-fun3d) 886 */ 887 #if defined(PETSC_HAVE_FORTRAN_CAPS) 888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 890 #define matsetvaluesblocked4_ matsetvaluesblocked4 891 #endif 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "matsetvaluesblocked4_" 895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 896 { 897 Mat A = *AA; 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 899 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 900 PetscInt *ai =a->i,*ailen=a->ilen; 901 PetscInt *aj =a->j,stepval,lastcol = -1; 902 const PetscScalar *value = v; 903 MatScalar *ap,*aa = a->a,*bap; 904 905 PetscFunctionBegin; 906 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 907 stepval = (n-1)*4; 908 for (k=0; k<m; k++) { /* loop over added rows */ 909 row = im[k]; 910 rp = aj + ai[row]; 911 ap = aa + 16*ai[row]; 912 nrow = ailen[row]; 913 low = 0; 914 high = nrow; 915 for (l=0; l<n; l++) { /* loop over added columns */ 916 col = in[l]; 917 if (col <= lastcol) low = 0; 918 else high = nrow; 919 lastcol = col; 920 value = v + k*(stepval+4 + l)*4; 921 while (high-low > 7) { 922 t = (low+high)/2; 923 if (rp[t] > col) high = t; 924 else low = t; 925 } 926 for (i=low; i<high; i++) { 927 if (rp[i] > col) break; 928 if (rp[i] == col) { 929 bap = ap + 16*i; 930 for (ii=0; ii<4; ii++,value+=stepval) { 931 for (jj=ii; jj<16; jj+=4) { 932 bap[jj] += *value++; 933 } 934 } 935 goto noinsert2; 936 } 937 } 938 N = nrow++ - 1; 939 high++; /* added new column index thus must search to one higher than before */ 940 /* shift up all the later entries in this row */ 941 for (ii=N; ii>=i; ii--) { 942 rp[ii+1] = rp[ii]; 943 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 944 } 945 if (N >= i) { 946 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 947 } 948 rp[i] = col; 949 bap = ap + 16*i; 950 for (ii=0; ii<4; ii++,value+=stepval) { 951 for (jj=ii; jj<16; jj+=4) { 952 bap[jj] = *value++; 953 } 954 } 955 noinsert2:; 956 low = i; 957 } 958 ailen[row] = nrow; 959 } 960 PetscFunctionReturnVoid(); 961 } 962 963 #if defined(PETSC_HAVE_FORTRAN_CAPS) 964 #define matsetvalues4_ MATSETVALUES4 965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 966 #define matsetvalues4_ matsetvalues4 967 #endif 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSetValues4_" 971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 972 { 973 Mat A = *AA; 974 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 975 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 976 PetscInt *ai=a->i,*ailen=a->ilen; 977 PetscInt *aj=a->j,brow,bcol; 978 PetscInt ridx,cidx,lastcol = -1; 979 MatScalar *ap,value,*aa=a->a,*bap; 980 981 PetscFunctionBegin; 982 for (k=0; k<m; k++) { /* loop over added rows */ 983 row = im[k]; brow = row/4; 984 rp = aj + ai[brow]; 985 ap = aa + 16*ai[brow]; 986 nrow = ailen[brow]; 987 low = 0; 988 high = nrow; 989 for (l=0; l<n; l++) { /* loop over added columns */ 990 col = in[l]; bcol = col/4; 991 ridx = row % 4; cidx = col % 4; 992 value = v[l + k*n]; 993 if (col <= lastcol) low = 0; 994 else high = nrow; 995 lastcol = col; 996 while (high-low > 7) { 997 t = (low+high)/2; 998 if (rp[t] > bcol) high = t; 999 else low = t; 1000 } 1001 for (i=low; i<high; i++) { 1002 if (rp[i] > bcol) break; 1003 if (rp[i] == bcol) { 1004 bap = ap + 16*i + 4*cidx + ridx; 1005 *bap += value; 1006 goto noinsert1; 1007 } 1008 } 1009 N = nrow++ - 1; 1010 high++; /* added new column thus must search to one higher than before */ 1011 /* shift up all the later entries in this row */ 1012 for (ii=N; ii>=i; ii--) { 1013 rp[ii+1] = rp[ii]; 1014 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1015 } 1016 if (N>=i) { 1017 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1018 } 1019 rp[i] = bcol; 1020 ap[16*i + 4*cidx + ridx] = value; 1021 noinsert1:; 1022 low = i; 1023 } 1024 ailen[brow] = nrow; 1025 } 1026 PetscFunctionReturnVoid(); 1027 } 1028 1029 /* 1030 Checks for missing diagonals 1031 */ 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*jj = a->j,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !jj) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1047 } else { 1048 diag = a->diag; 1049 for (i=0; i<a->mbs; i++) { 1050 if (jj[diag[i]] != i) { 1051 *missing = PETSC_TRUE; 1052 if (d) *d = i; 1053 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1054 break; 1055 } 1056 } 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = 0; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1176 { 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (!ia) PetscFunctionReturn(0); 1181 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1182 ierr = PetscFree(*ia);CHKERRQ(ierr); 1183 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1191 { 1192 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 #if defined(PETSC_USE_LOG) 1197 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1198 #endif 1199 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1200 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1201 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1202 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1203 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1204 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1205 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1206 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1207 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1208 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1209 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1210 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1211 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1212 1213 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1214 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1215 ierr = PetscFree(A->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1234 { 1235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 switch (op) { 1240 case MAT_ROW_ORIENTED: 1241 a->roworiented = flg; 1242 break; 1243 case MAT_KEEP_NONZERO_PATTERN: 1244 a->keepnonzeropattern = flg; 1245 break; 1246 case MAT_NEW_NONZERO_LOCATIONS: 1247 a->nonew = (flg ? 0 : 1); 1248 break; 1249 case MAT_NEW_NONZERO_LOCATION_ERR: 1250 a->nonew = (flg ? -1 : 0); 1251 break; 1252 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1253 a->nonew = (flg ? -2 : 0); 1254 break; 1255 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1256 a->nounused = (flg ? -1 : 0); 1257 break; 1258 case MAT_NEW_DIAGONALS: 1259 case MAT_IGNORE_OFF_PROC_ENTRIES: 1260 case MAT_USE_HASH_TABLE: 1261 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1262 break; 1263 case MAT_SPD: 1264 case MAT_SYMMETRIC: 1265 case MAT_STRUCTURALLY_SYMMETRIC: 1266 case MAT_HERMITIAN: 1267 case MAT_SYMMETRY_ETERNAL: 1268 /* These options are handled directly by MatSetOption() */ 1269 break; 1270 default: 1271 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1278 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1279 { 1280 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1281 PetscErrorCode ierr; 1282 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 1283 MatScalar *aa,*aa_i; 1284 PetscScalar *v_i; 1285 1286 PetscFunctionBegin; 1287 bs = A->rmap->bs; 1288 ai = a->i; 1289 aj = a->j; 1290 aa = a->a; 1291 bs2 = a->bs2; 1292 1293 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1294 1295 bn = row/bs; /* Block number */ 1296 bp = row % bs; /* Block Position */ 1297 M = ai[bn+1] - ai[bn]; 1298 *nz = bs*M; 1299 1300 if (v) { 1301 *v = 0; 1302 if (*nz) { 1303 ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr); 1304 for (i=0; i<M; i++) { /* for each block in the block row */ 1305 v_i = *v + i*bs; 1306 aa_i = aa + bs2*(ai[bn] + i); 1307 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1308 } 1309 } 1310 } 1311 1312 if (idx) { 1313 *idx = 0; 1314 if (*nz) { 1315 ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr); 1316 for (i=0; i<M; i++) { /* for each block in the block row */ 1317 idx_i = *idx + i*bs; 1318 itmp = bs*aj[ai[bn] + i]; 1319 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1320 } 1321 } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1334 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1335 PetscFunctionReturn(0); 1336 } 1337 1338 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1343 { 1344 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1345 Mat C; 1346 PetscErrorCode ierr; 1347 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1348 PetscInt *rows,*cols,bs2=a->bs2; 1349 MatScalar *array; 1350 1351 PetscFunctionBegin; 1352 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1353 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1354 ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr); 1355 1356 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1357 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1358 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1359 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1360 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1361 ierr = PetscFree(col);CHKERRQ(ierr); 1362 } else { 1363 C = *B; 1364 } 1365 1366 array = a->a; 1367 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1368 for (i=0; i<mbs; i++) { 1369 cols[0] = i*bs; 1370 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1371 len = ai[i+1] - ai[i]; 1372 for (j=0; j<len; j++) { 1373 rows[0] = (*aj++)*bs; 1374 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1375 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1376 array += bs2; 1377 } 1378 } 1379 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1380 1381 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1382 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 1384 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1385 *B = C; 1386 } else { 1387 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1394 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1395 { 1396 PetscErrorCode ierr; 1397 Mat Btrans; 1398 1399 PetscFunctionBegin; 1400 *f = PETSC_FALSE; 1401 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1402 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1403 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1409 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1410 { 1411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1412 PetscErrorCode ierr; 1413 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1414 int fd; 1415 PetscScalar *aa; 1416 FILE *file; 1417 1418 PetscFunctionBegin; 1419 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1420 ierr = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr); 1421 col_lens[0] = MAT_FILE_CLASSID; 1422 1423 col_lens[1] = A->rmap->N; 1424 col_lens[2] = A->cmap->n; 1425 col_lens[3] = a->nz*bs2; 1426 1427 /* store lengths of each row and write (including header) to file */ 1428 count = 0; 1429 for (i=0; i<a->mbs; i++) { 1430 for (j=0; j<bs; j++) { 1431 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1432 } 1433 } 1434 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1435 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1436 1437 /* store column indices (zero start index) */ 1438 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1439 count = 0; 1440 for (i=0; i<a->mbs; i++) { 1441 for (j=0; j<bs; j++) { 1442 for (k=a->i[i]; k<a->i[i+1]; k++) { 1443 for (l=0; l<bs; l++) { 1444 jj[count++] = bs*a->j[k] + l; 1445 } 1446 } 1447 } 1448 } 1449 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1450 ierr = PetscFree(jj);CHKERRQ(ierr); 1451 1452 /* store nonzero values */ 1453 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1454 count = 0; 1455 for (i=0; i<a->mbs; i++) { 1456 for (j=0; j<bs; j++) { 1457 for (k=a->i[i]; k<a->i[i+1]; k++) { 1458 for (l=0; l<bs; l++) { 1459 aa[count++] = a->a[bs2*k + l*bs + j]; 1460 } 1461 } 1462 } 1463 } 1464 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1465 ierr = PetscFree(aa);CHKERRQ(ierr); 1466 1467 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1468 if (file) { 1469 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1476 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1477 { 1478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1479 PetscErrorCode ierr; 1480 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1481 PetscViewerFormat format; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1485 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1486 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1487 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1488 Mat aij; 1489 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1490 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1491 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1495 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr); 1497 for (i=0; i<a->mbs; i++) { 1498 for (j=0; j<bs; j++) { 1499 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1500 for (k=a->i[i]; k<a->i[i+1]; k++) { 1501 for (l=0; l<bs; l++) { 1502 #if defined(PETSC_USE_COMPLEX) 1503 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1504 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1505 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1506 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1507 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1508 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1509 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1510 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1511 } 1512 #else 1513 if (a->a[bs2*k + l*bs + j] != 0.0) { 1514 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1515 } 1516 #endif 1517 } 1518 } 1519 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1520 } 1521 } 1522 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1523 } else { 1524 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1525 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr); 1526 for (i=0; i<a->mbs; i++) { 1527 for (j=0; j<bs; j++) { 1528 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1529 for (k=a->i[i]; k<a->i[i+1]; k++) { 1530 for (l=0; l<bs; l++) { 1531 #if defined(PETSC_USE_COMPLEX) 1532 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1533 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1534 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1535 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1537 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } else { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } 1541 #else 1542 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1543 #endif 1544 } 1545 } 1546 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1547 } 1548 } 1549 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1550 } 1551 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #include <petscdraw.h> 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1558 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1559 { 1560 Mat A = (Mat) Aa; 1561 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1562 PetscErrorCode ierr; 1563 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1564 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1565 MatScalar *aa; 1566 PetscViewer viewer; 1567 PetscViewerFormat format; 1568 1569 PetscFunctionBegin; 1570 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1571 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1572 1573 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1574 1575 /* loop over matrix elements drawing boxes */ 1576 1577 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1578 color = PETSC_DRAW_BLUE; 1579 for (i=0,row=0; i<mbs; i++,row+=bs) { 1580 for (j=a->i[i]; j<a->i[i+1]; j++) { 1581 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1582 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1583 aa = a->a + j*bs2; 1584 for (k=0; k<bs; k++) { 1585 for (l=0; l<bs; l++) { 1586 if (PetscRealPart(*aa++) >= 0.) continue; 1587 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1588 } 1589 } 1590 } 1591 } 1592 color = PETSC_DRAW_CYAN; 1593 for (i=0,row=0; i<mbs; i++,row+=bs) { 1594 for (j=a->i[i]; j<a->i[i+1]; j++) { 1595 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1596 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1597 aa = a->a + j*bs2; 1598 for (k=0; k<bs; k++) { 1599 for (l=0; l<bs; l++) { 1600 if (PetscRealPart(*aa++) != 0.) continue; 1601 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1602 } 1603 } 1604 } 1605 } 1606 color = PETSC_DRAW_RED; 1607 for (i=0,row=0; i<mbs; i++,row+=bs) { 1608 for (j=a->i[i]; j<a->i[i+1]; j++) { 1609 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1610 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1611 aa = a->a + j*bs2; 1612 for (k=0; k<bs; k++) { 1613 for (l=0; l<bs; l++) { 1614 if (PetscRealPart(*aa++) <= 0.) continue; 1615 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1616 } 1617 } 1618 } 1619 } 1620 } else { 1621 /* use contour shading to indicate magnitude of values */ 1622 /* first determine max of all nonzero values */ 1623 PetscDraw popup; 1624 PetscReal scale,maxv = 0.0; 1625 1626 for (i=0; i<a->nz*a->bs2; i++) { 1627 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1628 } 1629 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1630 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1631 if (popup) { 1632 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1633 } 1634 for (i=0,row=0; i<mbs; i++,row+=bs) { 1635 for (j=a->i[i]; j<a->i[i+1]; j++) { 1636 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1637 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1638 aa = a->a + j*bs2; 1639 for (k=0; k<bs; k++) { 1640 for (l=0; l<bs; l++) { 1641 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1642 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1643 } 1644 } 1645 } 1646 } 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 #undef __FUNCT__ 1652 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1653 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1654 { 1655 PetscErrorCode ierr; 1656 PetscReal xl,yl,xr,yr,w,h; 1657 PetscDraw draw; 1658 PetscBool isnull; 1659 1660 PetscFunctionBegin; 1661 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1662 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1663 1664 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1665 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1666 xr += w; yr += h; xl = -w; yl = -h; 1667 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1668 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1669 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatView_SeqBAIJ" 1675 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1676 { 1677 PetscErrorCode ierr; 1678 PetscBool iascii,isbinary,isdraw; 1679 1680 PetscFunctionBegin; 1681 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1682 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1683 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1684 if (iascii) { 1685 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1686 } else if (isbinary) { 1687 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1688 } else if (isdraw) { 1689 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1690 } else { 1691 Mat B; 1692 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1693 ierr = MatView(B,viewer);CHKERRQ(ierr); 1694 ierr = MatDestroy(&B);CHKERRQ(ierr); 1695 } 1696 PetscFunctionReturn(0); 1697 } 1698 1699 1700 #undef __FUNCT__ 1701 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1702 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1703 { 1704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1705 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1706 PetscInt *ai = a->i,*ailen = a->ilen; 1707 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1708 MatScalar *ap,*aa = a->a; 1709 1710 PetscFunctionBegin; 1711 for (k=0; k<m; k++) { /* loop over rows */ 1712 row = im[k]; brow = row/bs; 1713 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1714 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1715 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1716 nrow = ailen[brow]; 1717 for (l=0; l<n; l++) { /* loop over columns */ 1718 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1719 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1720 col = in[l]; 1721 bcol = col/bs; 1722 cidx = col%bs; 1723 ridx = row%bs; 1724 high = nrow; 1725 low = 0; /* assume unsorted */ 1726 while (high-low > 5) { 1727 t = (low+high)/2; 1728 if (rp[t] > bcol) high = t; 1729 else low = t; 1730 } 1731 for (i=low; i<high; i++) { 1732 if (rp[i] > bcol) break; 1733 if (rp[i] == bcol) { 1734 *v++ = ap[bs2*i+bs*cidx+ridx]; 1735 goto finished; 1736 } 1737 } 1738 *v++ = 0.0; 1739 finished:; 1740 } 1741 } 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1747 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1748 { 1749 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1750 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1751 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1752 PetscErrorCode ierr; 1753 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1754 PetscBool roworiented=a->roworiented; 1755 const PetscScalar *value = v; 1756 MatScalar *ap,*aa = a->a,*bap; 1757 1758 PetscFunctionBegin; 1759 if (roworiented) { 1760 stepval = (n-1)*bs; 1761 } else { 1762 stepval = (m-1)*bs; 1763 } 1764 for (k=0; k<m; k++) { /* loop over added rows */ 1765 row = im[k]; 1766 if (row < 0) continue; 1767 #if defined(PETSC_USE_DEBUG) 1768 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1769 #endif 1770 rp = aj + ai[row]; 1771 ap = aa + bs2*ai[row]; 1772 rmax = imax[row]; 1773 nrow = ailen[row]; 1774 low = 0; 1775 high = nrow; 1776 for (l=0; l<n; l++) { /* loop over added columns */ 1777 if (in[l] < 0) continue; 1778 #if defined(PETSC_USE_DEBUG) 1779 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1780 #endif 1781 col = in[l]; 1782 if (roworiented) { 1783 value = v + (k*(stepval+bs) + l)*bs; 1784 } else { 1785 value = v + (l*(stepval+bs) + k)*bs; 1786 } 1787 if (col <= lastcol) low = 0; 1788 else high = nrow; 1789 lastcol = col; 1790 while (high-low > 7) { 1791 t = (low+high)/2; 1792 if (rp[t] > col) high = t; 1793 else low = t; 1794 } 1795 for (i=low; i<high; i++) { 1796 if (rp[i] > col) break; 1797 if (rp[i] == col) { 1798 bap = ap + bs2*i; 1799 if (roworiented) { 1800 if (is == ADD_VALUES) { 1801 for (ii=0; ii<bs; ii++,value+=stepval) { 1802 for (jj=ii; jj<bs2; jj+=bs) { 1803 bap[jj] += *value++; 1804 } 1805 } 1806 } else { 1807 for (ii=0; ii<bs; ii++,value+=stepval) { 1808 for (jj=ii; jj<bs2; jj+=bs) { 1809 bap[jj] = *value++; 1810 } 1811 } 1812 } 1813 } else { 1814 if (is == ADD_VALUES) { 1815 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1816 for (jj=0; jj<bs; jj++) { 1817 bap[jj] += value[jj]; 1818 } 1819 bap += bs; 1820 } 1821 } else { 1822 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1823 for (jj=0; jj<bs; jj++) { 1824 bap[jj] = value[jj]; 1825 } 1826 bap += bs; 1827 } 1828 } 1829 } 1830 goto noinsert2; 1831 } 1832 } 1833 if (nonew == 1) goto noinsert2; 1834 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1835 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1836 N = nrow++ - 1; high++; 1837 /* shift up all the later entries in this row */ 1838 for (ii=N; ii>=i; ii--) { 1839 rp[ii+1] = rp[ii]; 1840 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1841 } 1842 if (N >= i) { 1843 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1844 } 1845 rp[i] = col; 1846 bap = ap + bs2*i; 1847 if (roworiented) { 1848 for (ii=0; ii<bs; ii++,value+=stepval) { 1849 for (jj=ii; jj<bs2; jj+=bs) { 1850 bap[jj] = *value++; 1851 } 1852 } 1853 } else { 1854 for (ii=0; ii<bs; ii++,value+=stepval) { 1855 for (jj=0; jj<bs; jj++) { 1856 *bap++ = *value++; 1857 } 1858 } 1859 } 1860 noinsert2:; 1861 low = i; 1862 } 1863 ailen[row] = nrow; 1864 } 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1870 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1871 { 1872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1873 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1874 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1875 PetscErrorCode ierr; 1876 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1877 MatScalar *aa = a->a,*ap; 1878 PetscReal ratio=0.6; 1879 1880 PetscFunctionBegin; 1881 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1882 1883 if (m) rmax = ailen[0]; 1884 for (i=1; i<mbs; i++) { 1885 /* move each row back by the amount of empty slots (fshift) before it*/ 1886 fshift += imax[i-1] - ailen[i-1]; 1887 rmax = PetscMax(rmax,ailen[i]); 1888 if (fshift) { 1889 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1890 N = ailen[i]; 1891 for (j=0; j<N; j++) { 1892 ip[j-fshift] = ip[j]; 1893 1894 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1895 } 1896 } 1897 ai[i] = ai[i-1] + ailen[i-1]; 1898 } 1899 if (mbs) { 1900 fshift += imax[mbs-1] - ailen[mbs-1]; 1901 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1902 } 1903 1904 /* reset ilen and imax for each row */ 1905 a->nonzerorowcnt = 0; 1906 for (i=0; i<mbs; i++) { 1907 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1908 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1909 } 1910 a->nz = ai[mbs]; 1911 1912 /* diagonals may have moved, so kill the diagonal pointers */ 1913 a->idiagvalid = PETSC_FALSE; 1914 if (fshift && a->diag) { 1915 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1916 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1917 a->diag = 0; 1918 } 1919 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1920 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1921 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1922 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1923 1924 A->info.mallocs += a->reallocs; 1925 a->reallocs = 0; 1926 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1927 1928 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1929 1930 A->same_nonzero = PETSC_TRUE; 1931 PetscFunctionReturn(0); 1932 } 1933 1934 /* 1935 This function returns an array of flags which indicate the locations of contiguous 1936 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1937 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1938 Assume: sizes should be long enough to hold all the values. 1939 */ 1940 #undef __FUNCT__ 1941 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1942 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1943 { 1944 PetscInt i,j,k,row; 1945 PetscBool flg; 1946 1947 PetscFunctionBegin; 1948 for (i=0,j=0; i<n; j++) { 1949 row = idx[i]; 1950 if (row%bs!=0) { /* Not the begining of a block */ 1951 sizes[j] = 1; 1952 i++; 1953 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1954 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1955 i++; 1956 } else { /* Begining of the block, so check if the complete block exists */ 1957 flg = PETSC_TRUE; 1958 for (k=1; k<bs; k++) { 1959 if (row+k != idx[i+k]) { /* break in the block */ 1960 flg = PETSC_FALSE; 1961 break; 1962 } 1963 } 1964 if (flg) { /* No break in the bs */ 1965 sizes[j] = bs; 1966 i += bs; 1967 } else { 1968 sizes[j] = 1; 1969 i++; 1970 } 1971 } 1972 } 1973 *bs_max = j; 1974 PetscFunctionReturn(0); 1975 } 1976 1977 #undef __FUNCT__ 1978 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1979 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1980 { 1981 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1982 PetscErrorCode ierr; 1983 PetscInt i,j,k,count,*rows; 1984 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1985 PetscScalar zero = 0.0; 1986 MatScalar *aa; 1987 const PetscScalar *xx; 1988 PetscScalar *bb; 1989 1990 PetscFunctionBegin; 1991 /* fix right hand side if needed */ 1992 if (x && b) { 1993 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1994 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1995 for (i=0; i<is_n; i++) { 1996 bb[is_idx[i]] = diag*xx[is_idx[i]]; 1997 } 1998 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1999 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2000 } 2001 2002 /* Make a copy of the IS and sort it */ 2003 /* allocate memory for rows,sizes */ 2004 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2005 2006 /* copy IS values to rows, and sort them */ 2007 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2008 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2009 2010 if (baij->keepnonzeropattern) { 2011 for (i=0; i<is_n; i++) sizes[i] = 1; 2012 bs_max = is_n; 2013 A->same_nonzero = PETSC_TRUE; 2014 } else { 2015 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2016 2017 A->same_nonzero = PETSC_FALSE; 2018 } 2019 2020 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2021 row = rows[j]; 2022 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2023 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2024 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2025 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2026 if (diag != (PetscScalar)0.0) { 2027 if (baij->ilen[row/bs] > 0) { 2028 baij->ilen[row/bs] = 1; 2029 baij->j[baij->i[row/bs]] = row/bs; 2030 2031 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2032 } 2033 /* Now insert all the diagonal values for this bs */ 2034 for (k=0; k<bs; k++) { 2035 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2036 } 2037 } else { /* (diag == 0.0) */ 2038 baij->ilen[row/bs] = 0; 2039 } /* end (diag == 0.0) */ 2040 } else { /* (sizes[i] != bs) */ 2041 #if defined(PETSC_USE_DEBUG) 2042 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2043 #endif 2044 for (k=0; k<count; k++) { 2045 aa[0] = zero; 2046 aa += bs; 2047 } 2048 if (diag != (PetscScalar)0.0) { 2049 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2050 } 2051 } 2052 } 2053 2054 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2055 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2061 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2062 { 2063 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2064 PetscErrorCode ierr; 2065 PetscInt i,j,k,count; 2066 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2067 PetscScalar zero = 0.0; 2068 MatScalar *aa; 2069 const PetscScalar *xx; 2070 PetscScalar *bb; 2071 PetscBool *zeroed,vecs = PETSC_FALSE; 2072 2073 PetscFunctionBegin; 2074 /* fix right hand side if needed */ 2075 if (x && b) { 2076 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2077 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2078 vecs = PETSC_TRUE; 2079 } 2080 A->same_nonzero = PETSC_TRUE; 2081 2082 /* zero the columns */ 2083 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2084 for (i=0; i<is_n; i++) { 2085 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2086 zeroed[is_idx[i]] = PETSC_TRUE; 2087 } 2088 for (i=0; i<A->rmap->N; i++) { 2089 if (!zeroed[i]) { 2090 row = i/bs; 2091 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2092 for (k=0; k<bs; k++) { 2093 col = bs*baij->j[j] + k; 2094 if (zeroed[col]) { 2095 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2096 if (vecs) bb[i] -= aa[0]*xx[col]; 2097 aa[0] = 0.0; 2098 } 2099 } 2100 } 2101 } else if (vecs) bb[i] = diag*xx[i]; 2102 } 2103 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2104 if (vecs) { 2105 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2106 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2107 } 2108 2109 /* zero the rows */ 2110 for (i=0; i<is_n; i++) { 2111 row = is_idx[i]; 2112 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2113 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2114 for (k=0; k<count; k++) { 2115 aa[0] = zero; 2116 aa += bs; 2117 } 2118 if (diag != (PetscScalar)0.0) { 2119 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2120 } 2121 } 2122 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 #undef __FUNCT__ 2127 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2128 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2129 { 2130 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2131 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2132 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2133 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2134 PetscErrorCode ierr; 2135 PetscInt ridx,cidx,bs2=a->bs2; 2136 PetscBool roworiented=a->roworiented; 2137 MatScalar *ap,value,*aa=a->a,*bap; 2138 2139 PetscFunctionBegin; 2140 if (v) PetscValidScalarPointer(v,6); 2141 for (k=0; k<m; k++) { /* loop over added rows */ 2142 row = im[k]; 2143 brow = row/bs; 2144 if (row < 0) continue; 2145 #if defined(PETSC_USE_DEBUG) 2146 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2147 #endif 2148 rp = aj + ai[brow]; 2149 ap = aa + bs2*ai[brow]; 2150 rmax = imax[brow]; 2151 nrow = ailen[brow]; 2152 low = 0; 2153 high = nrow; 2154 for (l=0; l<n; l++) { /* loop over added columns */ 2155 if (in[l] < 0) continue; 2156 #if defined(PETSC_USE_DEBUG) 2157 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2158 #endif 2159 col = in[l]; bcol = col/bs; 2160 ridx = row % bs; cidx = col % bs; 2161 if (roworiented) { 2162 value = v[l + k*n]; 2163 } else { 2164 value = v[k + l*m]; 2165 } 2166 if (col <= lastcol) low = 0; else high = nrow; 2167 lastcol = col; 2168 while (high-low > 7) { 2169 t = (low+high)/2; 2170 if (rp[t] > bcol) high = t; 2171 else low = t; 2172 } 2173 for (i=low; i<high; i++) { 2174 if (rp[i] > bcol) break; 2175 if (rp[i] == bcol) { 2176 bap = ap + bs2*i + bs*cidx + ridx; 2177 if (is == ADD_VALUES) *bap += value; 2178 else *bap = value; 2179 goto noinsert1; 2180 } 2181 } 2182 if (nonew == 1) goto noinsert1; 2183 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2184 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2185 N = nrow++ - 1; high++; 2186 /* shift up all the later entries in this row */ 2187 for (ii=N; ii>=i; ii--) { 2188 rp[ii+1] = rp[ii]; 2189 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2190 } 2191 if (N>=i) { 2192 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2193 } 2194 rp[i] = bcol; 2195 ap[bs2*i + bs*cidx + ridx] = value; 2196 a->nz++; 2197 noinsert1:; 2198 low = i; 2199 } 2200 ailen[brow] = nrow; 2201 } 2202 A->same_nonzero = PETSC_FALSE; 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2208 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2209 { 2210 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2211 Mat outA; 2212 PetscErrorCode ierr; 2213 PetscBool row_identity,col_identity; 2214 2215 PetscFunctionBegin; 2216 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2217 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2218 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2219 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2220 2221 outA = inA; 2222 inA->factortype = MAT_FACTOR_LU; 2223 2224 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2225 2226 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2227 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2228 a->row = row; 2229 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2230 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2231 a->col = col; 2232 2233 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2234 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2235 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2236 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2237 2238 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2239 if (!a->solve_work) { 2240 ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr); 2241 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2242 } 2243 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2249 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2250 { 2251 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2252 PetscInt i,nz,mbs; 2253 2254 PetscFunctionBegin; 2255 nz = baij->maxnz; 2256 mbs = baij->mbs; 2257 for (i=0; i<nz; i++) { 2258 baij->j[i] = indices[i]; 2259 } 2260 baij->nz = nz; 2261 for (i=0; i<mbs; i++) { 2262 baij->ilen[i] = baij->imax[i]; 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2269 /*@ 2270 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2271 in the matrix. 2272 2273 Input Parameters: 2274 + mat - the SeqBAIJ matrix 2275 - indices - the column indices 2276 2277 Level: advanced 2278 2279 Notes: 2280 This can be called if you have precomputed the nonzero structure of the 2281 matrix and want to provide it to the matrix object to improve the performance 2282 of the MatSetValues() operation. 2283 2284 You MUST have set the correct numbers of nonzeros per row in the call to 2285 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2286 2287 MUST be called before any calls to MatSetValues(); 2288 2289 @*/ 2290 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2291 { 2292 PetscErrorCode ierr; 2293 2294 PetscFunctionBegin; 2295 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2296 PetscValidPointer(indices,2); 2297 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2303 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2304 { 2305 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2306 PetscErrorCode ierr; 2307 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2308 PetscReal atmp; 2309 PetscScalar *x,zero = 0.0; 2310 MatScalar *aa; 2311 PetscInt ncols,brow,krow,kcol; 2312 2313 PetscFunctionBegin; 2314 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2315 bs = A->rmap->bs; 2316 aa = a->a; 2317 ai = a->i; 2318 aj = a->j; 2319 mbs = a->mbs; 2320 2321 ierr = VecSet(v,zero);CHKERRQ(ierr); 2322 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2323 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2324 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2325 for (i=0; i<mbs; i++) { 2326 ncols = ai[1] - ai[0]; ai++; 2327 brow = bs*i; 2328 for (j=0; j<ncols; j++) { 2329 for (kcol=0; kcol<bs; kcol++) { 2330 for (krow=0; krow<bs; krow++) { 2331 atmp = PetscAbsScalar(*aa);aa++; 2332 row = brow + krow; /* row index */ 2333 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2334 } 2335 } 2336 aj++; 2337 } 2338 } 2339 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatCopy_SeqBAIJ" 2345 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2346 { 2347 PetscErrorCode ierr; 2348 2349 PetscFunctionBegin; 2350 /* If the two matrices have the same copy implementation, use fast copy. */ 2351 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2353 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2354 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2355 2356 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2357 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2358 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2359 } else { 2360 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2361 } 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2367 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2368 { 2369 PetscErrorCode ierr; 2370 2371 PetscFunctionBegin; 2372 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2373 PetscFunctionReturn(0); 2374 } 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2378 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2379 { 2380 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2381 2382 PetscFunctionBegin; 2383 *array = a->a; 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2389 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2390 { 2391 PetscFunctionBegin; 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2397 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2398 { 2399 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2400 PetscErrorCode ierr; 2401 PetscInt i,bs=Y->rmap->bs,j,bs2=bs*bs; 2402 PetscBLASInt one=1; 2403 2404 PetscFunctionBegin; 2405 if (str == SAME_NONZERO_PATTERN) { 2406 PetscScalar alpha = a; 2407 PetscBLASInt bnz; 2408 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2409 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2410 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2411 if (y->xtoy && y->XtoY != X) { 2412 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2413 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2414 } 2415 if (!y->xtoy) { /* get xtoy */ 2416 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr); 2417 y->XtoY = X; 2418 ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2419 } 2420 for (i=0; i<x->nz; i++) { 2421 j = 0; 2422 while (j < bs2) { 2423 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2424 j++; 2425 } 2426 } 2427 ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(double)((PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 2428 } else { 2429 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2436 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2437 { 2438 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2439 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2440 MatScalar *aa = a->a; 2441 2442 PetscFunctionBegin; 2443 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2449 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2450 { 2451 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2452 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2453 MatScalar *aa = a->a; 2454 2455 PetscFunctionBegin; 2456 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 #undef __FUNCT__ 2461 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2462 /* 2463 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2464 */ 2465 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2466 { 2467 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2468 PetscErrorCode ierr; 2469 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2470 PetscInt nz = a->i[m],row,*jj,mr,col; 2471 2472 PetscFunctionBegin; 2473 *nn = n; 2474 if (!ia) PetscFunctionReturn(0); 2475 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2476 else { 2477 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2478 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2479 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2480 jj = a->j; 2481 for (i=0; i<nz; i++) { 2482 collengths[jj[i]]++; 2483 } 2484 cia[0] = oshift; 2485 for (i=0; i<n; i++) { 2486 cia[i+1] = cia[i] + collengths[i]; 2487 } 2488 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2489 jj = a->j; 2490 for (row=0; row<m; row++) { 2491 mr = a->i[row+1] - a->i[row]; 2492 for (i=0; i<mr; i++) { 2493 col = *jj++; 2494 2495 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2496 } 2497 } 2498 ierr = PetscFree(collengths);CHKERRQ(ierr); 2499 *ia = cia; *ja = cja; 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2506 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2507 { 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 if (!ia) PetscFunctionReturn(0); 2512 ierr = PetscFree(*ia);CHKERRQ(ierr); 2513 ierr = PetscFree(*ja);CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 /* 2518 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2519 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2520 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2521 */ 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2524 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2525 { 2526 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2527 PetscErrorCode ierr; 2528 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2529 PetscInt nz = a->i[m],row,*jj,mr,col; 2530 PetscInt *cspidx; 2531 2532 PetscFunctionBegin; 2533 *nn = n; 2534 if (!ia) PetscFunctionReturn(0); 2535 2536 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2537 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2538 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2539 ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr); 2540 jj = a->j; 2541 for (i=0; i<nz; i++) { 2542 collengths[jj[i]]++; 2543 } 2544 cia[0] = oshift; 2545 for (i=0; i<n; i++) { 2546 cia[i+1] = cia[i] + collengths[i]; 2547 } 2548 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2549 jj = a->j; 2550 for (row=0; row<m; row++) { 2551 mr = a->i[row+1] - a->i[row]; 2552 for (i=0; i<mr; i++) { 2553 col = *jj++; 2554 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2555 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2556 } 2557 } 2558 ierr = PetscFree(collengths);CHKERRQ(ierr); 2559 *ia = cia; *ja = cja; 2560 *spidx = cspidx; 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2566 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2567 { 2568 PetscErrorCode ierr; 2569 2570 PetscFunctionBegin; 2571 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2572 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 /* -------------------------------------------------------------------*/ 2577 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2578 MatGetRow_SeqBAIJ, 2579 MatRestoreRow_SeqBAIJ, 2580 MatMult_SeqBAIJ_N, 2581 /* 4*/ MatMultAdd_SeqBAIJ_N, 2582 MatMultTranspose_SeqBAIJ, 2583 MatMultTransposeAdd_SeqBAIJ, 2584 0, 2585 0, 2586 0, 2587 /* 10*/ 0, 2588 MatLUFactor_SeqBAIJ, 2589 0, 2590 0, 2591 MatTranspose_SeqBAIJ, 2592 /* 15*/ MatGetInfo_SeqBAIJ, 2593 MatEqual_SeqBAIJ, 2594 MatGetDiagonal_SeqBAIJ, 2595 MatDiagonalScale_SeqBAIJ, 2596 MatNorm_SeqBAIJ, 2597 /* 20*/ 0, 2598 MatAssemblyEnd_SeqBAIJ, 2599 MatSetOption_SeqBAIJ, 2600 MatZeroEntries_SeqBAIJ, 2601 /* 24*/ MatZeroRows_SeqBAIJ, 2602 0, 2603 0, 2604 0, 2605 0, 2606 /* 29*/ MatSetUp_SeqBAIJ, 2607 0, 2608 0, 2609 0, 2610 0, 2611 /* 34*/ MatDuplicate_SeqBAIJ, 2612 0, 2613 0, 2614 MatILUFactor_SeqBAIJ, 2615 0, 2616 /* 39*/ MatAXPY_SeqBAIJ, 2617 MatGetSubMatrices_SeqBAIJ, 2618 MatIncreaseOverlap_SeqBAIJ, 2619 MatGetValues_SeqBAIJ, 2620 MatCopy_SeqBAIJ, 2621 /* 44*/ 0, 2622 MatScale_SeqBAIJ, 2623 0, 2624 0, 2625 MatZeroRowsColumns_SeqBAIJ, 2626 /* 49*/ 0, 2627 MatGetRowIJ_SeqBAIJ, 2628 MatRestoreRowIJ_SeqBAIJ, 2629 MatGetColumnIJ_SeqBAIJ, 2630 MatRestoreColumnIJ_SeqBAIJ, 2631 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2632 0, 2633 0, 2634 0, 2635 MatSetValuesBlocked_SeqBAIJ, 2636 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2637 MatDestroy_SeqBAIJ, 2638 MatView_SeqBAIJ, 2639 0, 2640 0, 2641 /* 64*/ 0, 2642 0, 2643 0, 2644 0, 2645 0, 2646 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2647 0, 2648 MatConvert_Basic, 2649 0, 2650 0, 2651 /* 74*/ 0, 2652 MatFDColoringApply_BAIJ, 2653 0, 2654 0, 2655 0, 2656 /* 79*/ 0, 2657 0, 2658 0, 2659 0, 2660 MatLoad_SeqBAIJ, 2661 /* 84*/ 0, 2662 0, 2663 0, 2664 0, 2665 0, 2666 /* 89*/ 0, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /* 94*/ 0, 2672 0, 2673 0, 2674 0, 2675 0, 2676 /* 99*/ 0, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*104*/ 0, 2682 MatRealPart_SeqBAIJ, 2683 MatImaginaryPart_SeqBAIJ, 2684 0, 2685 0, 2686 /*109*/ 0, 2687 0, 2688 0, 2689 0, 2690 MatMissingDiagonal_SeqBAIJ, 2691 /*114*/ 0, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /*119*/ 0, 2697 0, 2698 MatMultHermitianTranspose_SeqBAIJ, 2699 MatMultHermitianTransposeAdd_SeqBAIJ, 2700 0, 2701 /*124*/ 0, 2702 0, 2703 MatInvertBlockDiagonal_SeqBAIJ, 2704 0, 2705 0, 2706 /*129*/ 0, 2707 0, 2708 0, 2709 0, 2710 0, 2711 /*134*/ 0, 2712 0, 2713 0, 2714 0, 2715 0, 2716 /*139*/ 0, 2717 0, 2718 0, 2719 MatFDColoringSetUp_SeqXAIJ 2720 }; 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2724 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2725 { 2726 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2727 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2732 2733 /* allocate space for values if not already there */ 2734 if (!aij->saved_values) { 2735 ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr); 2736 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2737 } 2738 2739 /* copy values over */ 2740 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2746 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2747 { 2748 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2749 PetscErrorCode ierr; 2750 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2751 2752 PetscFunctionBegin; 2753 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2754 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2755 2756 /* copy values over */ 2757 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2758 PetscFunctionReturn(0); 2759 } 2760 2761 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2762 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2763 2764 #undef __FUNCT__ 2765 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2766 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2767 { 2768 Mat_SeqBAIJ *b; 2769 PetscErrorCode ierr; 2770 PetscInt i,mbs,nbs,bs2; 2771 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2772 2773 PetscFunctionBegin; 2774 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2775 if (nz == MAT_SKIP_ALLOCATION) { 2776 skipallocation = PETSC_TRUE; 2777 nz = 0; 2778 } 2779 2780 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2781 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2782 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2783 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2784 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2785 2786 B->preallocated = PETSC_TRUE; 2787 2788 mbs = B->rmap->n/bs; 2789 nbs = B->cmap->n/bs; 2790 bs2 = bs*bs; 2791 2792 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2793 2794 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2795 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2796 if (nnz) { 2797 for (i=0; i<mbs; i++) { 2798 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2799 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2800 } 2801 } 2802 2803 b = (Mat_SeqBAIJ*)B->data; 2804 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2805 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2806 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2807 2808 if (!flg) { 2809 switch (bs) { 2810 case 1: 2811 B->ops->mult = MatMult_SeqBAIJ_1; 2812 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2813 break; 2814 case 2: 2815 B->ops->mult = MatMult_SeqBAIJ_2; 2816 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2817 break; 2818 case 3: 2819 B->ops->mult = MatMult_SeqBAIJ_3; 2820 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2821 break; 2822 case 4: 2823 B->ops->mult = MatMult_SeqBAIJ_4; 2824 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2825 break; 2826 case 5: 2827 B->ops->mult = MatMult_SeqBAIJ_5; 2828 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2829 break; 2830 case 6: 2831 B->ops->mult = MatMult_SeqBAIJ_6; 2832 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2833 break; 2834 case 7: 2835 B->ops->mult = MatMult_SeqBAIJ_7; 2836 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2837 break; 2838 case 15: 2839 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2840 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2841 break; 2842 default: 2843 B->ops->mult = MatMult_SeqBAIJ_N; 2844 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2845 break; 2846 } 2847 } 2848 B->ops->sor = MatSOR_SeqBAIJ; 2849 b->mbs = mbs; 2850 b->nbs = nbs; 2851 if (!skipallocation) { 2852 if (!b->imax) { 2853 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2854 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2855 2856 b->free_imax_ilen = PETSC_TRUE; 2857 } 2858 /* b->ilen will count nonzeros in each block row so far. */ 2859 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2860 if (!nnz) { 2861 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2862 else if (nz < 0) nz = 1; 2863 for (i=0; i<mbs; i++) b->imax[i] = nz; 2864 nz = nz*mbs; 2865 } else { 2866 nz = 0; 2867 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2868 } 2869 2870 /* allocate the matrix space */ 2871 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2872 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2873 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2874 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2875 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2876 2877 b->singlemalloc = PETSC_TRUE; 2878 b->i[0] = 0; 2879 for (i=1; i<mbs+1; i++) { 2880 b->i[i] = b->i[i-1] + b->imax[i-1]; 2881 } 2882 b->free_a = PETSC_TRUE; 2883 b->free_ij = PETSC_TRUE; 2884 #if defined(PETSC_THREADCOMM_ACTIVE) 2885 ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr); 2886 #endif 2887 } else { 2888 b->free_a = PETSC_FALSE; 2889 b->free_ij = PETSC_FALSE; 2890 } 2891 2892 b->bs2 = bs2; 2893 b->mbs = mbs; 2894 b->nz = 0; 2895 b->maxnz = nz; 2896 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2897 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2898 PetscFunctionReturn(0); 2899 } 2900 2901 #undef __FUNCT__ 2902 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2903 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2904 { 2905 PetscInt i,m,nz,nz_max=0,*nnz; 2906 PetscScalar *values=0; 2907 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2908 PetscErrorCode ierr; 2909 2910 PetscFunctionBegin; 2911 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2912 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2913 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2914 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2915 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2916 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2917 m = B->rmap->n/bs; 2918 2919 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2920 ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr); 2921 for (i=0; i<m; i++) { 2922 nz = ii[i+1]- ii[i]; 2923 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2924 nz_max = PetscMax(nz_max, nz); 2925 nnz[i] = nz; 2926 } 2927 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2928 ierr = PetscFree(nnz);CHKERRQ(ierr); 2929 2930 values = (PetscScalar*)V; 2931 if (!values) { 2932 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2933 } 2934 for (i=0; i<m; i++) { 2935 PetscInt ncols = ii[i+1] - ii[i]; 2936 const PetscInt *icols = jj + ii[i]; 2937 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2938 if (!roworiented) { 2939 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2940 } else { 2941 PetscInt j; 2942 for (j=0; j<ncols; j++) { 2943 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2944 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2945 } 2946 } 2947 } 2948 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2949 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2950 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2951 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2952 PetscFunctionReturn(0); 2953 } 2954 2955 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2956 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 2957 #if defined(PETSC_HAVE_MUMPS) 2958 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2959 #endif 2960 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 2961 2962 /*MC 2963 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2964 block sparse compressed row format. 2965 2966 Options Database Keys: 2967 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2968 2969 Level: beginner 2970 2971 .seealso: MatCreateSeqBAIJ() 2972 M*/ 2973 2974 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatCreate_SeqBAIJ" 2978 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2979 { 2980 PetscErrorCode ierr; 2981 PetscMPIInt size; 2982 Mat_SeqBAIJ *b; 2983 2984 PetscFunctionBegin; 2985 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2986 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 2987 2988 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2989 B->data = (void*)b; 2990 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2991 2992 b->row = 0; 2993 b->col = 0; 2994 b->icol = 0; 2995 b->reallocs = 0; 2996 b->saved_values = 0; 2997 2998 b->roworiented = PETSC_TRUE; 2999 b->nonew = 0; 3000 b->diag = 0; 3001 b->solve_work = 0; 3002 b->mult_work = 0; 3003 B->spptr = 0; 3004 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3005 b->keepnonzeropattern = PETSC_FALSE; 3006 b->xtoy = 0; 3007 b->XtoY = 0; 3008 B->same_nonzero = PETSC_FALSE; 3009 3010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3013 #if defined(PETSC_HAVE_MUMPS) 3014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3015 #endif 3016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3019 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3026 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3027 PetscFunctionReturn(0); 3028 } 3029 3030 #undef __FUNCT__ 3031 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3032 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3033 { 3034 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3035 PetscErrorCode ierr; 3036 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3037 3038 PetscFunctionBegin; 3039 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3040 3041 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3042 c->imax = a->imax; 3043 c->ilen = a->ilen; 3044 c->free_imax_ilen = PETSC_FALSE; 3045 } else { 3046 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3047 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3048 for (i=0; i<mbs; i++) { 3049 c->imax[i] = a->imax[i]; 3050 c->ilen[i] = a->ilen[i]; 3051 } 3052 c->free_imax_ilen = PETSC_TRUE; 3053 } 3054 3055 /* allocate the matrix space */ 3056 if (mallocmatspace) { 3057 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3058 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3059 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3060 3061 c->i = a->i; 3062 c->j = a->j; 3063 c->singlemalloc = PETSC_FALSE; 3064 c->free_a = PETSC_TRUE; 3065 c->free_ij = PETSC_FALSE; 3066 c->parent = A; 3067 C->preallocated = PETSC_TRUE; 3068 C->assembled = PETSC_TRUE; 3069 3070 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3071 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3072 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3073 } else { 3074 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3075 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3076 3077 c->singlemalloc = PETSC_TRUE; 3078 c->free_a = PETSC_TRUE; 3079 c->free_ij = PETSC_TRUE; 3080 3081 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3082 if (mbs > 0) { 3083 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3084 if (cpvalues == MAT_COPY_VALUES) { 3085 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3086 } else { 3087 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3088 } 3089 } 3090 C->preallocated = PETSC_TRUE; 3091 C->assembled = PETSC_TRUE; 3092 } 3093 } 3094 3095 c->roworiented = a->roworiented; 3096 c->nonew = a->nonew; 3097 3098 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3099 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3100 3101 c->bs2 = a->bs2; 3102 c->mbs = a->mbs; 3103 c->nbs = a->nbs; 3104 3105 if (a->diag) { 3106 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3107 c->diag = a->diag; 3108 c->free_diag = PETSC_FALSE; 3109 } else { 3110 ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr); 3111 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3112 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3113 c->free_diag = PETSC_TRUE; 3114 } 3115 } else c->diag = 0; 3116 3117 c->nz = a->nz; 3118 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3119 c->solve_work = 0; 3120 c->mult_work = 0; 3121 3122 c->compressedrow.use = a->compressedrow.use; 3123 c->compressedrow.nrows = a->compressedrow.nrows; 3124 if (a->compressedrow.use) { 3125 i = a->compressedrow.nrows; 3126 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3127 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3128 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3129 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3130 } else { 3131 c->compressedrow.use = PETSC_FALSE; 3132 c->compressedrow.i = NULL; 3133 c->compressedrow.rindex = NULL; 3134 } 3135 C->same_nonzero = A->same_nonzero; 3136 3137 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3138 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3139 PetscFunctionReturn(0); 3140 } 3141 3142 #undef __FUNCT__ 3143 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3144 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3145 { 3146 PetscErrorCode ierr; 3147 3148 PetscFunctionBegin; 3149 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3150 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3151 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3152 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3153 PetscFunctionReturn(0); 3154 } 3155 3156 #undef __FUNCT__ 3157 #define __FUNCT__ "MatLoad_SeqBAIJ" 3158 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3159 { 3160 Mat_SeqBAIJ *a; 3161 PetscErrorCode ierr; 3162 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3163 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3164 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3165 PetscInt *masked,nmask,tmp,bs2,ishift; 3166 PetscMPIInt size; 3167 int fd; 3168 PetscScalar *aa; 3169 MPI_Comm comm; 3170 3171 PetscFunctionBegin; 3172 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3173 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3174 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3175 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3176 bs2 = bs*bs; 3177 3178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3179 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3180 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3181 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3182 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3183 M = header[1]; N = header[2]; nz = header[3]; 3184 3185 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3186 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3187 3188 /* 3189 This code adds extra rows to make sure the number of rows is 3190 divisible by the blocksize 3191 */ 3192 mbs = M/bs; 3193 extra_rows = bs - M + bs*(mbs); 3194 if (extra_rows == bs) extra_rows = 0; 3195 else mbs++; 3196 if (extra_rows) { 3197 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3198 } 3199 3200 /* Set global sizes if not already set */ 3201 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3202 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3203 } else { /* Check if the matrix global sizes are correct */ 3204 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3205 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3206 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3207 } 3208 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3209 } 3210 3211 /* read in row lengths */ 3212 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 3213 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3214 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3215 3216 /* read in column indices */ 3217 ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr); 3218 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3219 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3220 3221 /* loop over row lengths determining block row lengths */ 3222 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3223 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3224 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3225 rowcount = 0; 3226 nzcount = 0; 3227 for (i=0; i<mbs; i++) { 3228 nmask = 0; 3229 for (j=0; j<bs; j++) { 3230 kmax = rowlengths[rowcount]; 3231 for (k=0; k<kmax; k++) { 3232 tmp = jj[nzcount++]/bs; 3233 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3234 } 3235 rowcount++; 3236 } 3237 browlengths[i] += nmask; 3238 /* zero out the mask elements we set */ 3239 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3240 } 3241 3242 /* Do preallocation */ 3243 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3244 a = (Mat_SeqBAIJ*)newmat->data; 3245 3246 /* set matrix "i" values */ 3247 a->i[0] = 0; 3248 for (i=1; i<= mbs; i++) { 3249 a->i[i] = a->i[i-1] + browlengths[i-1]; 3250 a->ilen[i-1] = browlengths[i-1]; 3251 } 3252 a->nz = 0; 3253 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3254 3255 /* read in nonzero values */ 3256 ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr); 3257 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3258 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3259 3260 /* set "a" and "j" values into matrix */ 3261 nzcount = 0; jcount = 0; 3262 for (i=0; i<mbs; i++) { 3263 nzcountb = nzcount; 3264 nmask = 0; 3265 for (j=0; j<bs; j++) { 3266 kmax = rowlengths[i*bs+j]; 3267 for (k=0; k<kmax; k++) { 3268 tmp = jj[nzcount++]/bs; 3269 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3270 } 3271 } 3272 /* sort the masked values */ 3273 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3274 3275 /* set "j" values into matrix */ 3276 maskcount = 1; 3277 for (j=0; j<nmask; j++) { 3278 a->j[jcount++] = masked[j]; 3279 mask[masked[j]] = maskcount++; 3280 } 3281 /* set "a" values into matrix */ 3282 ishift = bs2*a->i[i]; 3283 for (j=0; j<bs; j++) { 3284 kmax = rowlengths[i*bs+j]; 3285 for (k=0; k<kmax; k++) { 3286 tmp = jj[nzcountb]/bs; 3287 block = mask[tmp] - 1; 3288 point = jj[nzcountb] - bs*tmp; 3289 idx = ishift + bs2*block + j + bs*point; 3290 a->a[idx] = (MatScalar)aa[nzcountb++]; 3291 } 3292 } 3293 /* zero out the mask elements we set */ 3294 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3295 } 3296 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3297 3298 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3299 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3300 ierr = PetscFree(aa);CHKERRQ(ierr); 3301 ierr = PetscFree(jj);CHKERRQ(ierr); 3302 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3303 3304 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3305 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3306 PetscFunctionReturn(0); 3307 } 3308 3309 #undef __FUNCT__ 3310 #define __FUNCT__ "MatCreateSeqBAIJ" 3311 /*@C 3312 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3313 compressed row) format. For good matrix assembly performance the 3314 user should preallocate the matrix storage by setting the parameter nz 3315 (or the array nnz). By setting these parameters accurately, performance 3316 during matrix assembly can be increased by more than a factor of 50. 3317 3318 Collective on MPI_Comm 3319 3320 Input Parameters: 3321 + comm - MPI communicator, set to PETSC_COMM_SELF 3322 . bs - size of block 3323 . m - number of rows 3324 . n - number of columns 3325 . nz - number of nonzero blocks per block row (same for all rows) 3326 - nnz - array containing the number of nonzero blocks in the various block rows 3327 (possibly different for each block row) or NULL 3328 3329 Output Parameter: 3330 . A - the matrix 3331 3332 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3333 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3334 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3335 3336 Options Database Keys: 3337 . -mat_no_unroll - uses code that does not unroll the loops in the 3338 block calculations (much slower) 3339 . -mat_block_size - size of the blocks to use 3340 3341 Level: intermediate 3342 3343 Notes: 3344 The number of rows and columns must be divisible by blocksize. 3345 3346 If the nnz parameter is given then the nz parameter is ignored 3347 3348 A nonzero block is any block that as 1 or more nonzeros in it 3349 3350 The block AIJ format is fully compatible with standard Fortran 77 3351 storage. That is, the stored row and column indices can begin at 3352 either one (as in Fortran) or zero. See the users' manual for details. 3353 3354 Specify the preallocated storage with either nz or nnz (not both). 3355 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3356 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3357 matrices. 3358 3359 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3360 @*/ 3361 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3362 { 3363 PetscErrorCode ierr; 3364 3365 PetscFunctionBegin; 3366 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3367 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3368 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3369 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3370 PetscFunctionReturn(0); 3371 } 3372 3373 #undef __FUNCT__ 3374 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3375 /*@C 3376 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3377 per row in the matrix. For good matrix assembly performance the 3378 user should preallocate the matrix storage by setting the parameter nz 3379 (or the array nnz). By setting these parameters accurately, performance 3380 during matrix assembly can be increased by more than a factor of 50. 3381 3382 Collective on MPI_Comm 3383 3384 Input Parameters: 3385 + A - the matrix 3386 . bs - size of block 3387 . nz - number of block nonzeros per block row (same for all rows) 3388 - nnz - array containing the number of block nonzeros in the various block rows 3389 (possibly different for each block row) or NULL 3390 3391 Options Database Keys: 3392 . -mat_no_unroll - uses code that does not unroll the loops in the 3393 block calculations (much slower) 3394 . -mat_block_size - size of the blocks to use 3395 3396 Level: intermediate 3397 3398 Notes: 3399 If the nnz parameter is given then the nz parameter is ignored 3400 3401 You can call MatGetInfo() to get information on how effective the preallocation was; 3402 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3403 You can also run with the option -info and look for messages with the string 3404 malloc in them to see if additional memory allocation was needed. 3405 3406 The block AIJ format is fully compatible with standard Fortran 77 3407 storage. That is, the stored row and column indices can begin at 3408 either one (as in Fortran) or zero. See the users' manual for details. 3409 3410 Specify the preallocated storage with either nz or nnz (not both). 3411 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3412 allocation. See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3413 3414 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3415 @*/ 3416 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3417 { 3418 PetscErrorCode ierr; 3419 3420 PetscFunctionBegin; 3421 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3422 PetscValidType(B,1); 3423 PetscValidLogicalCollectiveInt(B,bs,2); 3424 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3425 PetscFunctionReturn(0); 3426 } 3427 3428 #undef __FUNCT__ 3429 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3430 /*@C 3431 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3432 (the default sequential PETSc format). 3433 3434 Collective on MPI_Comm 3435 3436 Input Parameters: 3437 + A - the matrix 3438 . i - the indices into j for the start of each local row (starts with zero) 3439 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3440 - v - optional values in the matrix 3441 3442 Level: developer 3443 3444 Notes: 3445 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3446 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3447 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3448 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3449 block column and the second index is over columns within a block. 3450 3451 .keywords: matrix, aij, compressed row, sparse 3452 3453 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3454 @*/ 3455 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3456 { 3457 PetscErrorCode ierr; 3458 3459 PetscFunctionBegin; 3460 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3461 PetscValidType(B,1); 3462 PetscValidLogicalCollectiveInt(B,bs,2); 3463 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3464 PetscFunctionReturn(0); 3465 } 3466 3467 3468 #undef __FUNCT__ 3469 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3470 /*@ 3471 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3472 3473 Collective on MPI_Comm 3474 3475 Input Parameters: 3476 + comm - must be an MPI communicator of size 1 3477 . bs - size of block 3478 . m - number of rows 3479 . n - number of columns 3480 . i - row indices 3481 . j - column indices 3482 - a - matrix values 3483 3484 Output Parameter: 3485 . mat - the matrix 3486 3487 Level: advanced 3488 3489 Notes: 3490 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3491 once the matrix is destroyed 3492 3493 You cannot set new nonzero locations into this matrix, that will generate an error. 3494 3495 The i and j indices are 0 based 3496 3497 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3498 3499 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3500 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3501 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3502 with column-major ordering within blocks. 3503 3504 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3505 3506 @*/ 3507 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3508 { 3509 PetscErrorCode ierr; 3510 PetscInt ii; 3511 Mat_SeqBAIJ *baij; 3512 3513 PetscFunctionBegin; 3514 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3515 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3516 3517 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3518 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3519 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3520 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3521 baij = (Mat_SeqBAIJ*)(*mat)->data; 3522 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3523 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3524 3525 baij->i = i; 3526 baij->j = j; 3527 baij->a = a; 3528 3529 baij->singlemalloc = PETSC_FALSE; 3530 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3531 baij->free_a = PETSC_FALSE; 3532 baij->free_ij = PETSC_FALSE; 3533 3534 for (ii=0; ii<m; ii++) { 3535 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3536 #if defined(PETSC_USE_DEBUG) 3537 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3538 #endif 3539 } 3540 #if defined(PETSC_USE_DEBUG) 3541 for (ii=0; ii<baij->i[m]; ii++) { 3542 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3543 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3544 } 3545 #endif 3546 3547 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3548 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3549 PetscFunctionReturn(0); 3550 } 3551