1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc-private/kernels/blockinvert.h> 9 #include <petsc-private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 21 PetscFunctionBegin; 22 if (a->idiagvalid) { 23 if (values) *values = a->idiag; 24 PetscFunctionReturn(0); 25 } 26 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 27 diag_offset = a->diag; 28 if (!a->idiag) { 29 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 30 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 31 } 32 diag = a->idiag; 33 mdiag = a->idiag+bs2*mbs; 34 if (values) *values = a->idiag; 35 /* factor and invert each block */ 36 switch (bs) { 37 case 1: 38 for (i=0; i<mbs; i++) { 39 odiag = v + 1*diag_offset[i]; 40 diag[0] = odiag[0]; 41 mdiag[0] = odiag[0]; 42 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 43 diag += 1; 44 mdiag += 1; 45 } 46 break; 47 case 2: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 4*diag_offset[i]; 50 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 51 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 52 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 53 diag += 4; 54 mdiag += 4; 55 } 56 break; 57 case 3: 58 for (i=0; i<mbs; i++) { 59 odiag = v + 9*diag_offset[i]; 60 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 61 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 62 diag[8] = odiag[8]; 63 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 64 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 65 mdiag[8] = odiag[8]; 66 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 67 diag += 9; 68 mdiag += 9; 69 } 70 break; 71 case 4: 72 for (i=0; i<mbs; i++) { 73 odiag = v + 16*diag_offset[i]; 74 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 75 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 76 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 77 diag += 16; 78 mdiag += 16; 79 } 80 break; 81 case 5: 82 for (i=0; i<mbs; i++) { 83 odiag = v + 25*diag_offset[i]; 84 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 85 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 86 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 87 diag += 25; 88 mdiag += 25; 89 } 90 break; 91 case 6: 92 for (i=0; i<mbs; i++) { 93 odiag = v + 36*diag_offset[i]; 94 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 97 diag += 36; 98 mdiag += 36; 99 } 100 break; 101 case 7: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 49*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 107 diag += 49; 108 mdiag += 49; 109 } 110 break; 111 default: 112 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 113 for (i=0; i<mbs; i++) { 114 odiag = v + bs2*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 118 diag += bs2; 119 mdiag += bs2; 120 } 121 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 122 } 123 a->idiagvalid = PETSC_TRUE; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatSOR_SeqBAIJ" 129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 130 { 131 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 132 PetscScalar *x,*work,*w,*workt,*t; 133 const MatScalar *v,*aa = a->a, *idiag; 134 const PetscScalar *b,*xb; 135 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 136 PetscErrorCode ierr; 137 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 138 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 139 140 PetscFunctionBegin; 141 its = its*lits; 142 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 143 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 144 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 145 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 146 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 147 148 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 149 150 if (!m) PetscFunctionReturn(0); 151 diag = a->diag; 152 idiag = a->idiag; 153 k = PetscMax(A->rmap->n,A->cmap->n); 154 if (!a->mult_work) { 155 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 156 } 157 if (!a->sor_workt) { 158 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 159 } 160 if (!a->sor_work) { 161 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 162 } 163 work = a->mult_work; 164 t = a->sor_workt; 165 w = a->sor_work; 166 167 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 168 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 169 170 if (flag & SOR_ZERO_INITIAL_GUESS) { 171 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 172 switch (bs) { 173 case 1: 174 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 175 t[0] = b[0]; 176 i2 = 1; 177 idiag += 1; 178 for (i=1; i<m; i++) { 179 v = aa + ai[i]; 180 vi = aj + ai[i]; 181 nz = diag[i] - ai[i]; 182 s[0] = b[i2]; 183 for (j=0; j<nz; j++) { 184 xw[0] = x[vi[j]]; 185 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 186 } 187 t[i2] = s[0]; 188 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 189 x[i2] = xw[0]; 190 idiag += 1; 191 i2 += 1; 192 } 193 break; 194 case 2: 195 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 196 t[0] = b[0]; t[1] = b[1]; 197 i2 = 2; 198 idiag += 4; 199 for (i=1; i<m; i++) { 200 v = aa + 4*ai[i]; 201 vi = aj + ai[i]; 202 nz = diag[i] - ai[i]; 203 s[0] = b[i2]; s[1] = b[i2+1]; 204 for (j=0; j<nz; j++) { 205 idx = 2*vi[j]; 206 it = 4*j; 207 xw[0] = x[idx]; xw[1] = x[1+idx]; 208 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 209 } 210 t[i2] = s[0]; t[i2+1] = s[1]; 211 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 212 x[i2] = xw[0]; x[i2+1] = xw[1]; 213 idiag += 4; 214 i2 += 2; 215 } 216 break; 217 case 3: 218 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 219 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 220 i2 = 3; 221 idiag += 9; 222 for (i=1; i<m; i++) { 223 v = aa + 9*ai[i]; 224 vi = aj + ai[i]; 225 nz = diag[i] - ai[i]; 226 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 227 while (nz--) { 228 idx = 3*(*vi++); 229 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 230 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 231 v += 9; 232 } 233 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 234 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 235 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 236 idiag += 9; 237 i2 += 3; 238 } 239 break; 240 case 4: 241 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 242 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 243 i2 = 4; 244 idiag += 16; 245 for (i=1; i<m; i++) { 246 v = aa + 16*ai[i]; 247 vi = aj + ai[i]; 248 nz = diag[i] - ai[i]; 249 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 250 while (nz--) { 251 idx = 4*(*vi++); 252 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 253 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 254 v += 16; 255 } 256 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 257 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 258 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 259 idiag += 16; 260 i2 += 4; 261 } 262 break; 263 case 5: 264 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 265 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 266 i2 = 5; 267 idiag += 25; 268 for (i=1; i<m; i++) { 269 v = aa + 25*ai[i]; 270 vi = aj + ai[i]; 271 nz = diag[i] - ai[i]; 272 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 273 while (nz--) { 274 idx = 5*(*vi++); 275 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 276 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 277 v += 25; 278 } 279 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 280 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 281 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 282 idiag += 25; 283 i2 += 5; 284 } 285 break; 286 case 6: 287 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 288 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 289 i2 = 6; 290 idiag += 36; 291 for (i=1; i<m; i++) { 292 v = aa + 36*ai[i]; 293 vi = aj + ai[i]; 294 nz = diag[i] - ai[i]; 295 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 296 while (nz--) { 297 idx = 6*(*vi++); 298 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 299 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 300 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 301 v += 36; 302 } 303 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 304 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 305 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 306 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 307 idiag += 36; 308 i2 += 6; 309 } 310 break; 311 case 7: 312 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 313 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 314 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 315 i2 = 7; 316 idiag += 49; 317 for (i=1; i<m; i++) { 318 v = aa + 49*ai[i]; 319 vi = aj + ai[i]; 320 nz = diag[i] - ai[i]; 321 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 322 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 323 while (nz--) { 324 idx = 7*(*vi++); 325 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 326 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 327 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 328 v += 49; 329 } 330 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 331 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 332 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 333 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 334 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 335 idiag += 49; 336 i2 += 7; 337 } 338 break; 339 default: 340 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 341 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 342 i2 = bs; 343 idiag += bs2; 344 for (i=1; i<m; i++) { 345 v = aa + bs2*ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 349 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 350 /* copy all rows of x that are needed into contiguous space */ 351 workt = work; 352 for (j=0; j<nz; j++) { 353 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 354 workt += bs; 355 } 356 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 357 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 358 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 359 360 idiag += bs2; 361 i2 += bs; 362 } 363 break; 364 } 365 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 366 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 367 xb = t; 368 } 369 else xb = b; 370 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 371 idiag = a->idiag+bs2*(a->mbs-1); 372 i2 = bs * (m-1); 373 switch (bs) { 374 case 1: 375 s[0] = xb[i2]; 376 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 377 x[i2] = xw[0]; 378 i2 -= 1; 379 for (i=m-2; i>=0; i--) { 380 v = aa + (diag[i]+1); 381 vi = aj + diag[i] + 1; 382 nz = ai[i+1] - diag[i] - 1; 383 s[0] = xb[i2]; 384 for (j=0; j<nz; j++) { 385 xw[0] = x[vi[j]]; 386 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 387 } 388 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 389 x[i2] = xw[0]; 390 idiag -= 1; 391 i2 -= 1; 392 } 393 break; 394 case 2: 395 s[0] = xb[i2]; s[1] = xb[i2+1]; 396 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 397 x[i2] = xw[0]; x[i2+1] = xw[1]; 398 i2 -= 2; 399 idiag -= 4; 400 for (i=m-2; i>=0; i--) { 401 v = aa + 4*(diag[i] + 1); 402 vi = aj + diag[i] + 1; 403 nz = ai[i+1] - diag[i] - 1; 404 s[0] = xb[i2]; s[1] = xb[i2+1]; 405 for (j=0; j<nz; j++) { 406 idx = 2*vi[j]; 407 it = 4*j; 408 xw[0] = x[idx]; xw[1] = x[1+idx]; 409 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 410 } 411 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 412 x[i2] = xw[0]; x[i2+1] = xw[1]; 413 idiag -= 4; 414 i2 -= 2; 415 } 416 break; 417 case 3: 418 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 419 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 420 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 421 i2 -= 3; 422 idiag -= 9; 423 for (i=m-2; i>=0; i--) { 424 v = aa + 9*(diag[i]+1); 425 vi = aj + diag[i] + 1; 426 nz = ai[i+1] - diag[i] - 1; 427 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 428 while (nz--) { 429 idx = 3*(*vi++); 430 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 431 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 432 v += 9; 433 } 434 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 435 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 436 idiag -= 9; 437 i2 -= 3; 438 } 439 break; 440 case 4: 441 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 442 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 443 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 444 i2 -= 4; 445 idiag -= 16; 446 for (i=m-2; i>=0; i--) { 447 v = aa + 16*(diag[i]+1); 448 vi = aj + diag[i] + 1; 449 nz = ai[i+1] - diag[i] - 1; 450 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 451 while (nz--) { 452 idx = 4*(*vi++); 453 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 454 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 455 v += 16; 456 } 457 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 458 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 459 idiag -= 16; 460 i2 -= 4; 461 } 462 break; 463 case 5: 464 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 465 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 466 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 467 i2 -= 5; 468 idiag -= 25; 469 for (i=m-2; i>=0; i--) { 470 v = aa + 25*(diag[i]+1); 471 vi = aj + diag[i] + 1; 472 nz = ai[i+1] - diag[i] - 1; 473 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 474 while (nz--) { 475 idx = 5*(*vi++); 476 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 477 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 478 v += 25; 479 } 480 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 481 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 482 idiag -= 25; 483 i2 -= 5; 484 } 485 break; 486 case 6: 487 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 488 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 489 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 490 i2 -= 6; 491 idiag -= 36; 492 for (i=m-2; i>=0; i--) { 493 v = aa + 36*(diag[i]+1); 494 vi = aj + diag[i] + 1; 495 nz = ai[i+1] - diag[i] - 1; 496 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 497 while (nz--) { 498 idx = 6*(*vi++); 499 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 500 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 501 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 502 v += 36; 503 } 504 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 505 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 506 idiag -= 36; 507 i2 -= 6; 508 } 509 break; 510 case 7: 511 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 512 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 513 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 514 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 515 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 516 i2 -= 7; 517 idiag -= 49; 518 for (i=m-2; i>=0; i--) { 519 v = aa + 49*(diag[i]+1); 520 vi = aj + diag[i] + 1; 521 nz = ai[i+1] - diag[i] - 1; 522 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 523 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 524 while (nz--) { 525 idx = 7*(*vi++); 526 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 527 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 528 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 529 v += 49; 530 } 531 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 532 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 533 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 534 idiag -= 49; 535 i2 -= 7; 536 } 537 break; 538 default: 539 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 540 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 541 i2 -= bs; 542 idiag -= bs2; 543 for (i=m-2; i>=0; i--) { 544 v = aa + bs2*(diag[i]+1); 545 vi = aj + diag[i] + 1; 546 nz = ai[i+1] - diag[i] - 1; 547 548 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 549 /* copy all rows of x that are needed into contiguous space */ 550 workt = work; 551 for (j=0; j<nz; j++) { 552 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 553 workt += bs; 554 } 555 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 556 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 557 558 idiag -= bs2; 559 i2 -= bs; 560 } 561 break; 562 } 563 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 564 } 565 its--; 566 } 567 while (its--) { 568 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 569 idiag = a->idiag; 570 i2 = 0; 571 switch (bs) { 572 case 1: 573 for (i=0; i<m; i++) { 574 v = aa + ai[i]; 575 vi = aj + ai[i]; 576 nz = ai[i+1] - ai[i]; 577 s[0] = b[i2]; 578 for (j=0; j<nz; j++) { 579 xw[0] = x[vi[j]]; 580 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 581 } 582 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 583 x[i2] += xw[0]; 584 idiag += 1; 585 i2 += 1; 586 } 587 break; 588 case 2: 589 for (i=0; i<m; i++) { 590 v = aa + 4*ai[i]; 591 vi = aj + ai[i]; 592 nz = ai[i+1] - ai[i]; 593 s[0] = b[i2]; s[1] = b[i2+1]; 594 for (j=0; j<nz; j++) { 595 idx = 2*vi[j]; 596 it = 4*j; 597 xw[0] = x[idx]; xw[1] = x[1+idx]; 598 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 599 } 600 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 601 x[i2] += xw[0]; x[i2+1] += xw[1]; 602 idiag += 4; 603 i2 += 2; 604 } 605 break; 606 case 3: 607 for (i=0; i<m; i++) { 608 v = aa + 9*ai[i]; 609 vi = aj + ai[i]; 610 nz = ai[i+1] - ai[i]; 611 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 612 while (nz--) { 613 idx = 3*(*vi++); 614 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 615 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 616 v += 9; 617 } 618 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 619 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 620 idiag += 9; 621 i2 += 3; 622 } 623 break; 624 case 4: 625 for (i=0; i<m; i++) { 626 v = aa + 16*ai[i]; 627 vi = aj + ai[i]; 628 nz = ai[i+1] - ai[i]; 629 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 630 while (nz--) { 631 idx = 4*(*vi++); 632 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 633 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 634 v += 16; 635 } 636 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 637 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 638 idiag += 16; 639 i2 += 4; 640 } 641 break; 642 case 5: 643 for (i=0; i<m; i++) { 644 v = aa + 25*ai[i]; 645 vi = aj + ai[i]; 646 nz = ai[i+1] - ai[i]; 647 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 648 while (nz--) { 649 idx = 5*(*vi++); 650 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 651 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 652 v += 25; 653 } 654 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 655 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 656 idiag += 25; 657 i2 += 5; 658 } 659 break; 660 case 6: 661 for (i=0; i<m; i++) { 662 v = aa + 36*ai[i]; 663 vi = aj + ai[i]; 664 nz = ai[i+1] - ai[i]; 665 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 666 while (nz--) { 667 idx = 6*(*vi++); 668 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 669 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 670 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 671 v += 36; 672 } 673 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 674 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 675 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 676 idiag += 36; 677 i2 += 6; 678 } 679 break; 680 case 7: 681 for (i=0; i<m; i++) { 682 v = aa + 49*ai[i]; 683 vi = aj + ai[i]; 684 nz = ai[i+1] - ai[i]; 685 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 686 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 687 while (nz--) { 688 idx = 7*(*vi++); 689 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 690 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 691 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 692 v += 49; 693 } 694 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 695 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 696 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 697 idiag += 49; 698 i2 += 7; 699 } 700 break; 701 default: 702 for (i=0; i<m; i++) { 703 v = aa + bs2*ai[i]; 704 vi = aj + ai[i]; 705 nz = ai[i+1] - ai[i]; 706 707 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 708 /* copy all rows of x that are needed into contiguous space */ 709 workt = work; 710 for (j=0; j<nz; j++) { 711 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 712 workt += bs; 713 } 714 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 715 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 716 717 idiag += bs2; 718 i2 += bs; 719 } 720 break; 721 } 722 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 723 } 724 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 725 idiag = a->idiag+bs2*(a->mbs-1); 726 i2 = bs * (m-1); 727 switch (bs) { 728 case 1: 729 for (i=m-1; i>=0; i--) { 730 v = aa + ai[i]; 731 vi = aj + ai[i]; 732 nz = ai[i+1] - ai[i]; 733 s[0] = b[i2]; 734 for (j=0; j<nz; j++) { 735 xw[0] = x[vi[j]]; 736 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 737 } 738 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 739 x[i2] += xw[0]; 740 idiag -= 1; 741 i2 -= 1; 742 } 743 break; 744 case 2: 745 for (i=m-1; i>=0; i--) { 746 v = aa + 4*ai[i]; 747 vi = aj + ai[i]; 748 nz = ai[i+1] - ai[i]; 749 s[0] = b[i2]; s[1] = b[i2+1]; 750 for (j=0; j<nz; j++) { 751 idx = 2*vi[j]; 752 it = 4*j; 753 xw[0] = x[idx]; xw[1] = x[1+idx]; 754 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 755 } 756 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 757 x[i2] += xw[0]; x[i2+1] += xw[1]; 758 idiag -= 4; 759 i2 -= 2; 760 } 761 break; 762 case 3: 763 for (i=m-1; i>=0; i--) { 764 v = aa + 9*ai[i]; 765 vi = aj + ai[i]; 766 nz = ai[i+1] - ai[i]; 767 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 768 while (nz--) { 769 idx = 3*(*vi++); 770 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 771 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 772 v += 9; 773 } 774 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 775 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 776 idiag -= 9; 777 i2 -= 3; 778 } 779 break; 780 case 4: 781 for (i=m-1; i>=0; i--) { 782 v = aa + 16*ai[i]; 783 vi = aj + ai[i]; 784 nz = ai[i+1] - ai[i]; 785 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 786 while (nz--) { 787 idx = 4*(*vi++); 788 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 789 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 790 v += 16; 791 } 792 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 793 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 794 idiag -= 16; 795 i2 -= 4; 796 } 797 break; 798 case 5: 799 for (i=m-1; i>=0; i--) { 800 v = aa + 25*ai[i]; 801 vi = aj + ai[i]; 802 nz = ai[i+1] - ai[i]; 803 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 804 while (nz--) { 805 idx = 5*(*vi++); 806 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 807 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 808 v += 25; 809 } 810 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 811 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 812 idiag -= 25; 813 i2 -= 5; 814 } 815 break; 816 case 6: 817 for (i=m-1; i>=0; i--) { 818 v = aa + 36*ai[i]; 819 vi = aj + ai[i]; 820 nz = ai[i+1] - ai[i]; 821 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 822 while (nz--) { 823 idx = 6*(*vi++); 824 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 825 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 826 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 827 v += 36; 828 } 829 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 830 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 831 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 832 idiag -= 36; 833 i2 -= 6; 834 } 835 break; 836 case 7: 837 for (i=m-1; i>=0; i--) { 838 v = aa + 49*ai[i]; 839 vi = aj + ai[i]; 840 nz = ai[i+1] - ai[i]; 841 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 842 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 843 while (nz--) { 844 idx = 7*(*vi++); 845 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 846 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 847 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 848 v += 49; 849 } 850 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 851 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 852 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 853 idiag -= 49; 854 i2 -= 7; 855 } 856 break; 857 default: 858 for (i=m-1; i>=0; i--) { 859 v = aa + bs2*ai[i]; 860 vi = aj + ai[i]; 861 nz = ai[i+1] - ai[i]; 862 863 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 864 /* copy all rows of x that are needed into contiguous space */ 865 workt = work; 866 for (j=0; j<nz; j++) { 867 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 868 workt += bs; 869 } 870 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 871 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 872 873 idiag -= bs2; 874 i2 -= bs; 875 } 876 break; 877 } 878 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 879 } 880 } 881 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 882 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 887 /* 888 Special version for direct calls from Fortran (Used in PETSc-fun3d) 889 */ 890 #if defined(PETSC_HAVE_FORTRAN_CAPS) 891 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 892 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 893 #define matsetvaluesblocked4_ matsetvaluesblocked4 894 #endif 895 896 #undef __FUNCT__ 897 #define __FUNCT__ "matsetvaluesblocked4_" 898 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 899 { 900 Mat A = *AA; 901 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 902 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 903 PetscInt *ai =a->i,*ailen=a->ilen; 904 PetscInt *aj =a->j,stepval,lastcol = -1; 905 const PetscScalar *value = v; 906 MatScalar *ap,*aa = a->a,*bap; 907 908 PetscFunctionBegin; 909 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 910 stepval = (n-1)*4; 911 for (k=0; k<m; k++) { /* loop over added rows */ 912 row = im[k]; 913 rp = aj + ai[row]; 914 ap = aa + 16*ai[row]; 915 nrow = ailen[row]; 916 low = 0; 917 high = nrow; 918 for (l=0; l<n; l++) { /* loop over added columns */ 919 col = in[l]; 920 if (col <= lastcol) low = 0; 921 else high = nrow; 922 lastcol = col; 923 value = v + k*(stepval+4 + l)*4; 924 while (high-low > 7) { 925 t = (low+high)/2; 926 if (rp[t] > col) high = t; 927 else low = t; 928 } 929 for (i=low; i<high; i++) { 930 if (rp[i] > col) break; 931 if (rp[i] == col) { 932 bap = ap + 16*i; 933 for (ii=0; ii<4; ii++,value+=stepval) { 934 for (jj=ii; jj<16; jj+=4) { 935 bap[jj] += *value++; 936 } 937 } 938 goto noinsert2; 939 } 940 } 941 N = nrow++ - 1; 942 high++; /* added new column index thus must search to one higher than before */ 943 /* shift up all the later entries in this row */ 944 for (ii=N; ii>=i; ii--) { 945 rp[ii+1] = rp[ii]; 946 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 947 } 948 if (N >= i) { 949 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 950 } 951 rp[i] = col; 952 bap = ap + 16*i; 953 for (ii=0; ii<4; ii++,value+=stepval) { 954 for (jj=ii; jj<16; jj+=4) { 955 bap[jj] = *value++; 956 } 957 } 958 noinsert2:; 959 low = i; 960 } 961 ailen[row] = nrow; 962 } 963 PetscFunctionReturnVoid(); 964 } 965 966 #if defined(PETSC_HAVE_FORTRAN_CAPS) 967 #define matsetvalues4_ MATSETVALUES4 968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 969 #define matsetvalues4_ matsetvalues4 970 #endif 971 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatSetValues4_" 974 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 975 { 976 Mat A = *AA; 977 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 978 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 979 PetscInt *ai=a->i,*ailen=a->ilen; 980 PetscInt *aj=a->j,brow,bcol; 981 PetscInt ridx,cidx,lastcol = -1; 982 MatScalar *ap,value,*aa=a->a,*bap; 983 984 PetscFunctionBegin; 985 for (k=0; k<m; k++) { /* loop over added rows */ 986 row = im[k]; brow = row/4; 987 rp = aj + ai[brow]; 988 ap = aa + 16*ai[brow]; 989 nrow = ailen[brow]; 990 low = 0; 991 high = nrow; 992 for (l=0; l<n; l++) { /* loop over added columns */ 993 col = in[l]; bcol = col/4; 994 ridx = row % 4; cidx = col % 4; 995 value = v[l + k*n]; 996 if (col <= lastcol) low = 0; 997 else high = nrow; 998 lastcol = col; 999 while (high-low > 7) { 1000 t = (low+high)/2; 1001 if (rp[t] > bcol) high = t; 1002 else low = t; 1003 } 1004 for (i=low; i<high; i++) { 1005 if (rp[i] > bcol) break; 1006 if (rp[i] == bcol) { 1007 bap = ap + 16*i + 4*cidx + ridx; 1008 *bap += value; 1009 goto noinsert1; 1010 } 1011 } 1012 N = nrow++ - 1; 1013 high++; /* added new column thus must search to one higher than before */ 1014 /* shift up all the later entries in this row */ 1015 for (ii=N; ii>=i; ii--) { 1016 rp[ii+1] = rp[ii]; 1017 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1018 } 1019 if (N>=i) { 1020 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1021 } 1022 rp[i] = bcol; 1023 ap[16*i + 4*cidx + ridx] = value; 1024 noinsert1:; 1025 low = i; 1026 } 1027 ailen[brow] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 1032 /* 1033 Checks for missing diagonals 1034 */ 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1038 { 1039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1040 PetscErrorCode ierr; 1041 PetscInt *diag,*ii = a->i,i; 1042 1043 PetscFunctionBegin; 1044 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1045 *missing = PETSC_FALSE; 1046 if (A->rmap->n > 0 && !ii) { 1047 *missing = PETSC_TRUE; 1048 if (d) *d = 0; 1049 PetscInfo(A,"Matrix has no entries therefore is missing diagonal"); 1050 } else { 1051 diag = a->diag; 1052 for (i=0; i<a->mbs; i++) { 1053 if (diag[i] >= ii[i+1]) { 1054 *missing = PETSC_TRUE; 1055 if (d) *d = i; 1056 PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 1057 break; 1058 } 1059 } 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1067 { 1068 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 PetscInt i,j,m = a->mbs; 1071 1072 PetscFunctionBegin; 1073 if (!a->diag) { 1074 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1075 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1076 a->free_diag = PETSC_TRUE; 1077 } 1078 for (i=0; i<m; i++) { 1079 a->diag[i] = a->i[i+1]; 1080 for (j=a->i[i]; j<a->i[i+1]; j++) { 1081 if (a->j[j] == i) { 1082 a->diag[i] = j; 1083 break; 1084 } 1085 } 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1093 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1094 { 1095 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1096 PetscErrorCode ierr; 1097 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1098 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1099 1100 PetscFunctionBegin; 1101 *nn = n; 1102 if (!ia) PetscFunctionReturn(0); 1103 if (symmetric) { 1104 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1105 nz = tia[n]; 1106 } else { 1107 tia = a->i; tja = a->j; 1108 } 1109 1110 if (!blockcompressed && bs > 1) { 1111 (*nn) *= bs; 1112 /* malloc & create the natural set of indices */ 1113 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1114 if (n) { 1115 (*ia)[0] = 0; 1116 for (j=1; j<bs; j++) { 1117 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1118 } 1119 } 1120 1121 for (i=1; i<n; i++) { 1122 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1123 for (j=1; j<bs; j++) { 1124 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1125 } 1126 } 1127 if (n) { 1128 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1129 } 1130 1131 if (inja) { 1132 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1133 cnt = 0; 1134 for (i=0; i<n; i++) { 1135 for (j=0; j<bs; j++) { 1136 for (k=tia[i]; k<tia[i+1]; k++) { 1137 for (l=0; l<bs; l++) { 1138 (*ja)[cnt++] = bs*tja[k] + l; 1139 } 1140 } 1141 } 1142 } 1143 } 1144 1145 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1146 ierr = PetscFree(tia);CHKERRQ(ierr); 1147 ierr = PetscFree(tja);CHKERRQ(ierr); 1148 } 1149 } else if (oshift == 1) { 1150 if (symmetric) { 1151 nz = tia[A->rmap->n/bs]; 1152 /* add 1 to i and j indices */ 1153 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1154 *ia = tia; 1155 if (ja) { 1156 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1157 *ja = tja; 1158 } 1159 } else { 1160 nz = a->i[A->rmap->n/bs]; 1161 /* malloc space and add 1 to i and j indices */ 1162 ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr); 1163 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1164 if (ja) { 1165 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1166 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1167 } 1168 } 1169 } else { 1170 *ia = tia; 1171 if (ja) *ja = tja; 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1178 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1179 { 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 if (!ia) PetscFunctionReturn(0); 1184 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1185 ierr = PetscFree(*ia);CHKERRQ(ierr); 1186 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1193 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1194 { 1195 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1196 PetscErrorCode ierr; 1197 1198 PetscFunctionBegin; 1199 #if defined(PETSC_USE_LOG) 1200 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1201 #endif 1202 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1203 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1204 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1205 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1206 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1207 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1208 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1209 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1210 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1211 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1212 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1213 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1214 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 1215 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1216 1217 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1218 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1219 ierr = PetscFree(A->data);CHKERRQ(ierr); 1220 1221 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1237 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1238 { 1239 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 switch (op) { 1244 case MAT_ROW_ORIENTED: 1245 a->roworiented = flg; 1246 break; 1247 case MAT_KEEP_NONZERO_PATTERN: 1248 a->keepnonzeropattern = flg; 1249 break; 1250 case MAT_NEW_NONZERO_LOCATIONS: 1251 a->nonew = (flg ? 0 : 1); 1252 break; 1253 case MAT_NEW_NONZERO_LOCATION_ERR: 1254 a->nonew = (flg ? -1 : 0); 1255 break; 1256 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1257 a->nonew = (flg ? -2 : 0); 1258 break; 1259 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1260 a->nounused = (flg ? -1 : 0); 1261 break; 1262 case MAT_NEW_DIAGONALS: 1263 case MAT_IGNORE_OFF_PROC_ENTRIES: 1264 case MAT_USE_HASH_TABLE: 1265 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1266 break; 1267 case MAT_SPD: 1268 case MAT_SYMMETRIC: 1269 case MAT_STRUCTURALLY_SYMMETRIC: 1270 case MAT_HERMITIAN: 1271 case MAT_SYMMETRY_ETERNAL: 1272 /* These options are handled directly by MatSetOption() */ 1273 break; 1274 default: 1275 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1283 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1284 { 1285 PetscErrorCode ierr; 1286 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1287 MatScalar *aa_i; 1288 PetscScalar *v_i; 1289 1290 PetscFunctionBegin; 1291 bs = A->rmap->bs; 1292 bs2 = bs*bs; 1293 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1294 1295 bn = row/bs; /* Block number */ 1296 bp = row % bs; /* Block Position */ 1297 M = ai[bn+1] - ai[bn]; 1298 *nz = bs*M; 1299 1300 if (v) { 1301 *v = 0; 1302 if (*nz) { 1303 ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr); 1304 for (i=0; i<M; i++) { /* for each block in the block row */ 1305 v_i = *v + i*bs; 1306 aa_i = aa + bs2*(ai[bn] + i); 1307 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1308 } 1309 } 1310 } 1311 1312 if (idx) { 1313 *idx = 0; 1314 if (*nz) { 1315 ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr); 1316 for (i=0; i<M; i++) { /* for each block in the block row */ 1317 idx_i = *idx + i*bs; 1318 itmp = bs*aj[ai[bn] + i]; 1319 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1320 } 1321 } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1328 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1329 { 1330 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1340 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1341 { 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1346 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1347 PetscFunctionReturn(0); 1348 } 1349 1350 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1354 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1355 { 1356 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1357 Mat C; 1358 PetscErrorCode ierr; 1359 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1360 PetscInt *rows,*cols,bs2=a->bs2; 1361 MatScalar *array; 1362 1363 PetscFunctionBegin; 1364 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1365 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1366 ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr); 1367 1368 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1369 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1370 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1371 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1372 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1373 ierr = PetscFree(col);CHKERRQ(ierr); 1374 } else { 1375 C = *B; 1376 } 1377 1378 array = a->a; 1379 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1380 for (i=0; i<mbs; i++) { 1381 cols[0] = i*bs; 1382 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1383 len = ai[i+1] - ai[i]; 1384 for (j=0; j<len; j++) { 1385 rows[0] = (*aj++)*bs; 1386 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1387 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1388 array += bs2; 1389 } 1390 } 1391 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1392 1393 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 1396 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1397 *B = C; 1398 } else { 1399 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1407 { 1408 PetscErrorCode ierr; 1409 Mat Btrans; 1410 1411 PetscFunctionBegin; 1412 *f = PETSC_FALSE; 1413 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1414 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1415 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1421 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1422 { 1423 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1424 PetscErrorCode ierr; 1425 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1426 int fd; 1427 PetscScalar *aa; 1428 FILE *file; 1429 1430 PetscFunctionBegin; 1431 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1432 ierr = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr); 1433 col_lens[0] = MAT_FILE_CLASSID; 1434 1435 col_lens[1] = A->rmap->N; 1436 col_lens[2] = A->cmap->n; 1437 col_lens[3] = a->nz*bs2; 1438 1439 /* store lengths of each row and write (including header) to file */ 1440 count = 0; 1441 for (i=0; i<a->mbs; i++) { 1442 for (j=0; j<bs; j++) { 1443 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1444 } 1445 } 1446 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1447 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1448 1449 /* store column indices (zero start index) */ 1450 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1451 count = 0; 1452 for (i=0; i<a->mbs; i++) { 1453 for (j=0; j<bs; j++) { 1454 for (k=a->i[i]; k<a->i[i+1]; k++) { 1455 for (l=0; l<bs; l++) { 1456 jj[count++] = bs*a->j[k] + l; 1457 } 1458 } 1459 } 1460 } 1461 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1462 ierr = PetscFree(jj);CHKERRQ(ierr); 1463 1464 /* store nonzero values */ 1465 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1466 count = 0; 1467 for (i=0; i<a->mbs; i++) { 1468 for (j=0; j<bs; j++) { 1469 for (k=a->i[i]; k<a->i[i+1]; k++) { 1470 for (l=0; l<bs; l++) { 1471 aa[count++] = a->a[bs2*k + l*bs + j]; 1472 } 1473 } 1474 } 1475 } 1476 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1477 ierr = PetscFree(aa);CHKERRQ(ierr); 1478 1479 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1480 if (file) { 1481 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1488 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1489 { 1490 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1491 PetscErrorCode ierr; 1492 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1493 PetscViewerFormat format; 1494 1495 PetscFunctionBegin; 1496 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1497 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1498 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1499 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1500 Mat aij; 1501 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1502 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1503 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1504 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1505 PetscFunctionReturn(0); 1506 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1507 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1508 for (i=0; i<a->mbs; i++) { 1509 for (j=0; j<bs; j++) { 1510 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1511 for (k=a->i[i]; k<a->i[i+1]; k++) { 1512 for (l=0; l<bs; l++) { 1513 #if defined(PETSC_USE_COMPLEX) 1514 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1515 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1516 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1517 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1518 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1519 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1520 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1521 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1522 } 1523 #else 1524 if (a->a[bs2*k + l*bs + j] != 0.0) { 1525 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1526 } 1527 #endif 1528 } 1529 } 1530 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1531 } 1532 } 1533 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1534 } else { 1535 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1536 for (i=0; i<a->mbs; i++) { 1537 for (j=0; j<bs; j++) { 1538 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1539 for (k=a->i[i]; k<a->i[i+1]; k++) { 1540 for (l=0; l<bs; l++) { 1541 #if defined(PETSC_USE_COMPLEX) 1542 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1543 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1544 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1545 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1547 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1548 } else { 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1550 } 1551 #else 1552 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1553 #endif 1554 } 1555 } 1556 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1557 } 1558 } 1559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1560 } 1561 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #include <petscdraw.h> 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1568 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1569 { 1570 Mat A = (Mat) Aa; 1571 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1572 PetscErrorCode ierr; 1573 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1574 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1575 MatScalar *aa; 1576 PetscViewer viewer; 1577 PetscViewerFormat format; 1578 1579 PetscFunctionBegin; 1580 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1581 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1582 1583 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1584 1585 /* loop over matrix elements drawing boxes */ 1586 1587 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1588 color = PETSC_DRAW_BLUE; 1589 for (i=0,row=0; i<mbs; i++,row+=bs) { 1590 for (j=a->i[i]; j<a->i[i+1]; j++) { 1591 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1592 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1593 aa = a->a + j*bs2; 1594 for (k=0; k<bs; k++) { 1595 for (l=0; l<bs; l++) { 1596 if (PetscRealPart(*aa++) >= 0.) continue; 1597 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1598 } 1599 } 1600 } 1601 } 1602 color = PETSC_DRAW_CYAN; 1603 for (i=0,row=0; i<mbs; i++,row+=bs) { 1604 for (j=a->i[i]; j<a->i[i+1]; j++) { 1605 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1606 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1607 aa = a->a + j*bs2; 1608 for (k=0; k<bs; k++) { 1609 for (l=0; l<bs; l++) { 1610 if (PetscRealPart(*aa++) != 0.) continue; 1611 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1612 } 1613 } 1614 } 1615 } 1616 color = PETSC_DRAW_RED; 1617 for (i=0,row=0; i<mbs; i++,row+=bs) { 1618 for (j=a->i[i]; j<a->i[i+1]; j++) { 1619 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1620 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1621 aa = a->a + j*bs2; 1622 for (k=0; k<bs; k++) { 1623 for (l=0; l<bs; l++) { 1624 if (PetscRealPart(*aa++) <= 0.) continue; 1625 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1626 } 1627 } 1628 } 1629 } 1630 } else { 1631 /* use contour shading to indicate magnitude of values */ 1632 /* first determine max of all nonzero values */ 1633 PetscDraw popup; 1634 PetscReal scale,maxv = 0.0; 1635 1636 for (i=0; i<a->nz*a->bs2; i++) { 1637 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1638 } 1639 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1640 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1641 if (popup) { 1642 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1643 } 1644 for (i=0,row=0; i<mbs; i++,row+=bs) { 1645 for (j=a->i[i]; j<a->i[i+1]; j++) { 1646 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1647 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1648 aa = a->a + j*bs2; 1649 for (k=0; k<bs; k++) { 1650 for (l=0; l<bs; l++) { 1651 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1652 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1653 } 1654 } 1655 } 1656 } 1657 } 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1663 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1664 { 1665 PetscErrorCode ierr; 1666 PetscReal xl,yl,xr,yr,w,h; 1667 PetscDraw draw; 1668 PetscBool isnull; 1669 1670 PetscFunctionBegin; 1671 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1672 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1673 1674 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1675 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1676 xr += w; yr += h; xl = -w; yl = -h; 1677 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1678 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1679 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "MatView_SeqBAIJ" 1685 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1686 { 1687 PetscErrorCode ierr; 1688 PetscBool iascii,isbinary,isdraw; 1689 1690 PetscFunctionBegin; 1691 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1692 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1693 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1694 if (iascii) { 1695 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1696 } else if (isbinary) { 1697 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1698 } else if (isdraw) { 1699 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1700 } else { 1701 Mat B; 1702 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1703 ierr = MatView(B,viewer);CHKERRQ(ierr); 1704 ierr = MatDestroy(&B);CHKERRQ(ierr); 1705 } 1706 PetscFunctionReturn(0); 1707 } 1708 1709 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1712 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1713 { 1714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1715 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1716 PetscInt *ai = a->i,*ailen = a->ilen; 1717 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1718 MatScalar *ap,*aa = a->a; 1719 1720 PetscFunctionBegin; 1721 for (k=0; k<m; k++) { /* loop over rows */ 1722 row = im[k]; brow = row/bs; 1723 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1724 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1725 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1726 nrow = ailen[brow]; 1727 for (l=0; l<n; l++) { /* loop over columns */ 1728 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1729 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1730 col = in[l]; 1731 bcol = col/bs; 1732 cidx = col%bs; 1733 ridx = row%bs; 1734 high = nrow; 1735 low = 0; /* assume unsorted */ 1736 while (high-low > 5) { 1737 t = (low+high)/2; 1738 if (rp[t] > bcol) high = t; 1739 else low = t; 1740 } 1741 for (i=low; i<high; i++) { 1742 if (rp[i] > bcol) break; 1743 if (rp[i] == bcol) { 1744 *v++ = ap[bs2*i+bs*cidx+ridx]; 1745 goto finished; 1746 } 1747 } 1748 *v++ = 0.0; 1749 finished:; 1750 } 1751 } 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1757 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1758 { 1759 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1760 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1761 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1762 PetscErrorCode ierr; 1763 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1764 PetscBool roworiented=a->roworiented; 1765 const PetscScalar *value = v; 1766 MatScalar *ap,*aa = a->a,*bap; 1767 1768 PetscFunctionBegin; 1769 if (roworiented) { 1770 stepval = (n-1)*bs; 1771 } else { 1772 stepval = (m-1)*bs; 1773 } 1774 for (k=0; k<m; k++) { /* loop over added rows */ 1775 row = im[k]; 1776 if (row < 0) continue; 1777 #if defined(PETSC_USE_DEBUG) 1778 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 1779 #endif 1780 rp = aj + ai[row]; 1781 ap = aa + bs2*ai[row]; 1782 rmax = imax[row]; 1783 nrow = ailen[row]; 1784 low = 0; 1785 high = nrow; 1786 for (l=0; l<n; l++) { /* loop over added columns */ 1787 if (in[l] < 0) continue; 1788 #if defined(PETSC_USE_DEBUG) 1789 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 1790 #endif 1791 col = in[l]; 1792 if (roworiented) { 1793 value = v + (k*(stepval+bs) + l)*bs; 1794 } else { 1795 value = v + (l*(stepval+bs) + k)*bs; 1796 } 1797 if (col <= lastcol) low = 0; 1798 else high = nrow; 1799 lastcol = col; 1800 while (high-low > 7) { 1801 t = (low+high)/2; 1802 if (rp[t] > col) high = t; 1803 else low = t; 1804 } 1805 for (i=low; i<high; i++) { 1806 if (rp[i] > col) break; 1807 if (rp[i] == col) { 1808 bap = ap + bs2*i; 1809 if (roworiented) { 1810 if (is == ADD_VALUES) { 1811 for (ii=0; ii<bs; ii++,value+=stepval) { 1812 for (jj=ii; jj<bs2; jj+=bs) { 1813 bap[jj] += *value++; 1814 } 1815 } 1816 } else { 1817 for (ii=0; ii<bs; ii++,value+=stepval) { 1818 for (jj=ii; jj<bs2; jj+=bs) { 1819 bap[jj] = *value++; 1820 } 1821 } 1822 } 1823 } else { 1824 if (is == ADD_VALUES) { 1825 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1826 for (jj=0; jj<bs; jj++) { 1827 bap[jj] += value[jj]; 1828 } 1829 bap += bs; 1830 } 1831 } else { 1832 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1833 for (jj=0; jj<bs; jj++) { 1834 bap[jj] = value[jj]; 1835 } 1836 bap += bs; 1837 } 1838 } 1839 } 1840 goto noinsert2; 1841 } 1842 } 1843 if (nonew == 1) goto noinsert2; 1844 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1845 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1846 N = nrow++ - 1; high++; 1847 /* shift up all the later entries in this row */ 1848 for (ii=N; ii>=i; ii--) { 1849 rp[ii+1] = rp[ii]; 1850 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1851 } 1852 if (N >= i) { 1853 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1854 } 1855 rp[i] = col; 1856 bap = ap + bs2*i; 1857 if (roworiented) { 1858 for (ii=0; ii<bs; ii++,value+=stepval) { 1859 for (jj=ii; jj<bs2; jj+=bs) { 1860 bap[jj] = *value++; 1861 } 1862 } 1863 } else { 1864 for (ii=0; ii<bs; ii++,value+=stepval) { 1865 for (jj=0; jj<bs; jj++) { 1866 *bap++ = *value++; 1867 } 1868 } 1869 } 1870 noinsert2:; 1871 low = i; 1872 } 1873 ailen[row] = nrow; 1874 } 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1880 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1881 { 1882 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1883 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1884 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1885 PetscErrorCode ierr; 1886 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1887 MatScalar *aa = a->a,*ap; 1888 PetscReal ratio=0.6; 1889 1890 PetscFunctionBegin; 1891 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1892 1893 if (m) rmax = ailen[0]; 1894 for (i=1; i<mbs; i++) { 1895 /* move each row back by the amount of empty slots (fshift) before it*/ 1896 fshift += imax[i-1] - ailen[i-1]; 1897 rmax = PetscMax(rmax,ailen[i]); 1898 if (fshift) { 1899 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1900 N = ailen[i]; 1901 for (j=0; j<N; j++) { 1902 ip[j-fshift] = ip[j]; 1903 1904 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1905 } 1906 } 1907 ai[i] = ai[i-1] + ailen[i-1]; 1908 } 1909 if (mbs) { 1910 fshift += imax[mbs-1] - ailen[mbs-1]; 1911 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1912 } 1913 1914 /* reset ilen and imax for each row */ 1915 a->nonzerorowcnt = 0; 1916 for (i=0; i<mbs; i++) { 1917 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1918 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1919 } 1920 a->nz = ai[mbs]; 1921 1922 /* diagonals may have moved, so kill the diagonal pointers */ 1923 a->idiagvalid = PETSC_FALSE; 1924 if (fshift && a->diag) { 1925 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1926 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1927 a->diag = 0; 1928 } 1929 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1930 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1931 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1932 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1933 1934 A->info.mallocs += a->reallocs; 1935 a->reallocs = 0; 1936 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1937 1938 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 /* 1943 This function returns an array of flags which indicate the locations of contiguous 1944 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1945 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1946 Assume: sizes should be long enough to hold all the values. 1947 */ 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1950 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1951 { 1952 PetscInt i,j,k,row; 1953 PetscBool flg; 1954 1955 PetscFunctionBegin; 1956 for (i=0,j=0; i<n; j++) { 1957 row = idx[i]; 1958 if (row%bs!=0) { /* Not the begining of a block */ 1959 sizes[j] = 1; 1960 i++; 1961 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1962 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1963 i++; 1964 } else { /* Begining of the block, so check if the complete block exists */ 1965 flg = PETSC_TRUE; 1966 for (k=1; k<bs; k++) { 1967 if (row+k != idx[i+k]) { /* break in the block */ 1968 flg = PETSC_FALSE; 1969 break; 1970 } 1971 } 1972 if (flg) { /* No break in the bs */ 1973 sizes[j] = bs; 1974 i += bs; 1975 } else { 1976 sizes[j] = 1; 1977 i++; 1978 } 1979 } 1980 } 1981 *bs_max = j; 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1987 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1988 { 1989 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1990 PetscErrorCode ierr; 1991 PetscInt i,j,k,count,*rows; 1992 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 1993 PetscScalar zero = 0.0; 1994 MatScalar *aa; 1995 const PetscScalar *xx; 1996 PetscScalar *bb; 1997 1998 PetscFunctionBegin; 1999 /* fix right hand side if needed */ 2000 if (x && b) { 2001 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2002 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2003 for (i=0; i<is_n; i++) { 2004 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2005 } 2006 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2007 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2008 } 2009 2010 /* Make a copy of the IS and sort it */ 2011 /* allocate memory for rows,sizes */ 2012 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2013 2014 /* copy IS values to rows, and sort them */ 2015 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2016 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2017 2018 if (baij->keepnonzeropattern) { 2019 for (i=0; i<is_n; i++) sizes[i] = 1; 2020 bs_max = is_n; 2021 } else { 2022 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2023 A->nonzerostate++; 2024 } 2025 2026 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2027 row = rows[j]; 2028 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2029 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2030 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2031 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2032 if (diag != (PetscScalar)0.0) { 2033 if (baij->ilen[row/bs] > 0) { 2034 baij->ilen[row/bs] = 1; 2035 baij->j[baij->i[row/bs]] = row/bs; 2036 2037 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2038 } 2039 /* Now insert all the diagonal values for this bs */ 2040 for (k=0; k<bs; k++) { 2041 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2042 } 2043 } else { /* (diag == 0.0) */ 2044 baij->ilen[row/bs] = 0; 2045 } /* end (diag == 0.0) */ 2046 } else { /* (sizes[i] != bs) */ 2047 #if defined(PETSC_USE_DEBUG) 2048 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2049 #endif 2050 for (k=0; k<count; k++) { 2051 aa[0] = zero; 2052 aa += bs; 2053 } 2054 if (diag != (PetscScalar)0.0) { 2055 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2056 } 2057 } 2058 } 2059 2060 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2061 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2067 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2068 { 2069 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2070 PetscErrorCode ierr; 2071 PetscInt i,j,k,count; 2072 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2073 PetscScalar zero = 0.0; 2074 MatScalar *aa; 2075 const PetscScalar *xx; 2076 PetscScalar *bb; 2077 PetscBool *zeroed,vecs = PETSC_FALSE; 2078 2079 PetscFunctionBegin; 2080 /* fix right hand side if needed */ 2081 if (x && b) { 2082 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2083 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2084 vecs = PETSC_TRUE; 2085 } 2086 2087 /* zero the columns */ 2088 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2089 for (i=0; i<is_n; i++) { 2090 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2091 zeroed[is_idx[i]] = PETSC_TRUE; 2092 } 2093 for (i=0; i<A->rmap->N; i++) { 2094 if (!zeroed[i]) { 2095 row = i/bs; 2096 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2097 for (k=0; k<bs; k++) { 2098 col = bs*baij->j[j] + k; 2099 if (zeroed[col]) { 2100 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2101 if (vecs) bb[i] -= aa[0]*xx[col]; 2102 aa[0] = 0.0; 2103 } 2104 } 2105 } 2106 } else if (vecs) bb[i] = diag*xx[i]; 2107 } 2108 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2109 if (vecs) { 2110 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2111 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2112 } 2113 2114 /* zero the rows */ 2115 for (i=0; i<is_n; i++) { 2116 row = is_idx[i]; 2117 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2118 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2119 for (k=0; k<count; k++) { 2120 aa[0] = zero; 2121 aa += bs; 2122 } 2123 if (diag != (PetscScalar)0.0) { 2124 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2125 } 2126 } 2127 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2133 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2134 { 2135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2136 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2137 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2138 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2139 PetscErrorCode ierr; 2140 PetscInt ridx,cidx,bs2=a->bs2; 2141 PetscBool roworiented=a->roworiented; 2142 MatScalar *ap,value,*aa=a->a,*bap; 2143 2144 PetscFunctionBegin; 2145 for (k=0; k<m; k++) { /* loop over added rows */ 2146 row = im[k]; 2147 brow = row/bs; 2148 if (row < 0) continue; 2149 #if defined(PETSC_USE_DEBUG) 2150 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2151 #endif 2152 rp = aj + ai[brow]; 2153 ap = aa + bs2*ai[brow]; 2154 rmax = imax[brow]; 2155 nrow = ailen[brow]; 2156 low = 0; 2157 high = nrow; 2158 for (l=0; l<n; l++) { /* loop over added columns */ 2159 if (in[l] < 0) continue; 2160 #if defined(PETSC_USE_DEBUG) 2161 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2162 #endif 2163 col = in[l]; bcol = col/bs; 2164 ridx = row % bs; cidx = col % bs; 2165 if (roworiented) { 2166 value = v[l + k*n]; 2167 } else { 2168 value = v[k + l*m]; 2169 } 2170 if (col <= lastcol) low = 0; else high = nrow; 2171 lastcol = col; 2172 while (high-low > 7) { 2173 t = (low+high)/2; 2174 if (rp[t] > bcol) high = t; 2175 else low = t; 2176 } 2177 for (i=low; i<high; i++) { 2178 if (rp[i] > bcol) break; 2179 if (rp[i] == bcol) { 2180 bap = ap + bs2*i + bs*cidx + ridx; 2181 if (is == ADD_VALUES) *bap += value; 2182 else *bap = value; 2183 goto noinsert1; 2184 } 2185 } 2186 if (nonew == 1) goto noinsert1; 2187 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2188 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2189 N = nrow++ - 1; high++; 2190 /* shift up all the later entries in this row */ 2191 for (ii=N; ii>=i; ii--) { 2192 rp[ii+1] = rp[ii]; 2193 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2194 } 2195 if (N>=i) { 2196 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2197 } 2198 rp[i] = bcol; 2199 ap[bs2*i + bs*cidx + ridx] = value; 2200 a->nz++; 2201 A->nonzerostate++; 2202 noinsert1:; 2203 low = i; 2204 } 2205 ailen[brow] = nrow; 2206 } 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2212 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2213 { 2214 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2215 Mat outA; 2216 PetscErrorCode ierr; 2217 PetscBool row_identity,col_identity; 2218 2219 PetscFunctionBegin; 2220 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2221 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2222 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2223 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2224 2225 outA = inA; 2226 inA->factortype = MAT_FACTOR_LU; 2227 2228 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2229 2230 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2231 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2232 a->row = row; 2233 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2234 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2235 a->col = col; 2236 2237 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2238 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2239 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2240 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2241 2242 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2243 if (!a->solve_work) { 2244 ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr); 2245 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2246 } 2247 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2253 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2254 { 2255 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2256 PetscInt i,nz,mbs; 2257 2258 PetscFunctionBegin; 2259 nz = baij->maxnz; 2260 mbs = baij->mbs; 2261 for (i=0; i<nz; i++) { 2262 baij->j[i] = indices[i]; 2263 } 2264 baij->nz = nz; 2265 for (i=0; i<mbs; i++) { 2266 baij->ilen[i] = baij->imax[i]; 2267 } 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2273 /*@ 2274 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2275 in the matrix. 2276 2277 Input Parameters: 2278 + mat - the SeqBAIJ matrix 2279 - indices - the column indices 2280 2281 Level: advanced 2282 2283 Notes: 2284 This can be called if you have precomputed the nonzero structure of the 2285 matrix and want to provide it to the matrix object to improve the performance 2286 of the MatSetValues() operation. 2287 2288 You MUST have set the correct numbers of nonzeros per row in the call to 2289 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2290 2291 MUST be called before any calls to MatSetValues(); 2292 2293 @*/ 2294 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2295 { 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2300 PetscValidPointer(indices,2); 2301 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2307 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2308 { 2309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2310 PetscErrorCode ierr; 2311 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2312 PetscReal atmp; 2313 PetscScalar *x,zero = 0.0; 2314 MatScalar *aa; 2315 PetscInt ncols,brow,krow,kcol; 2316 2317 PetscFunctionBegin; 2318 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2319 bs = A->rmap->bs; 2320 aa = a->a; 2321 ai = a->i; 2322 aj = a->j; 2323 mbs = a->mbs; 2324 2325 ierr = VecSet(v,zero);CHKERRQ(ierr); 2326 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2327 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2328 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2329 for (i=0; i<mbs; i++) { 2330 ncols = ai[1] - ai[0]; ai++; 2331 brow = bs*i; 2332 for (j=0; j<ncols; j++) { 2333 for (kcol=0; kcol<bs; kcol++) { 2334 for (krow=0; krow<bs; krow++) { 2335 atmp = PetscAbsScalar(*aa);aa++; 2336 row = brow + krow; /* row index */ 2337 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2338 } 2339 } 2340 aj++; 2341 } 2342 } 2343 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatCopy_SeqBAIJ" 2349 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2350 { 2351 PetscErrorCode ierr; 2352 2353 PetscFunctionBegin; 2354 /* If the two matrices have the same copy implementation, use fast copy. */ 2355 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2356 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2357 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2358 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2359 2360 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2361 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2362 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2363 } else { 2364 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2371 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2372 { 2373 PetscErrorCode ierr; 2374 2375 PetscFunctionBegin; 2376 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2382 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2383 { 2384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2385 2386 PetscFunctionBegin; 2387 *array = a->a; 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2393 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2394 { 2395 PetscFunctionBegin; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2401 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2402 { 2403 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2404 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2405 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2406 PetscErrorCode ierr; 2407 2408 PetscFunctionBegin; 2409 /* Set the number of nonzeros in the new matrix */ 2410 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2416 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2417 { 2418 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2419 PetscErrorCode ierr; 2420 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2421 PetscBLASInt one=1; 2422 2423 PetscFunctionBegin; 2424 if (str == SAME_NONZERO_PATTERN) { 2425 PetscScalar alpha = a; 2426 PetscBLASInt bnz; 2427 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2428 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2429 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2430 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2431 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2432 } else { 2433 Mat B; 2434 PetscInt *nnz; 2435 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2436 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2437 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2438 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2439 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2440 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2441 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2442 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2443 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2444 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2445 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2446 ierr = PetscFree(nnz);CHKERRQ(ierr); 2447 } 2448 PetscFunctionReturn(0); 2449 } 2450 2451 #undef __FUNCT__ 2452 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2454 { 2455 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2456 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2457 MatScalar *aa = a->a; 2458 2459 PetscFunctionBegin; 2460 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2467 { 2468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2469 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2470 MatScalar *aa = a->a; 2471 2472 PetscFunctionBegin; 2473 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2479 /* 2480 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2481 */ 2482 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2483 { 2484 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2485 PetscErrorCode ierr; 2486 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2487 PetscInt nz = a->i[m],row,*jj,mr,col; 2488 2489 PetscFunctionBegin; 2490 *nn = n; 2491 if (!ia) PetscFunctionReturn(0); 2492 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2493 else { 2494 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2495 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2496 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2497 jj = a->j; 2498 for (i=0; i<nz; i++) { 2499 collengths[jj[i]]++; 2500 } 2501 cia[0] = oshift; 2502 for (i=0; i<n; i++) { 2503 cia[i+1] = cia[i] + collengths[i]; 2504 } 2505 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2506 jj = a->j; 2507 for (row=0; row<m; row++) { 2508 mr = a->i[row+1] - a->i[row]; 2509 for (i=0; i<mr; i++) { 2510 col = *jj++; 2511 2512 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2513 } 2514 } 2515 ierr = PetscFree(collengths);CHKERRQ(ierr); 2516 *ia = cia; *ja = cja; 2517 } 2518 PetscFunctionReturn(0); 2519 } 2520 2521 #undef __FUNCT__ 2522 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2523 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2524 { 2525 PetscErrorCode ierr; 2526 2527 PetscFunctionBegin; 2528 if (!ia) PetscFunctionReturn(0); 2529 ierr = PetscFree(*ia);CHKERRQ(ierr); 2530 ierr = PetscFree(*ja);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 /* 2535 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2536 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2537 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2538 */ 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2541 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2542 { 2543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2544 PetscErrorCode ierr; 2545 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2546 PetscInt nz = a->i[m],row,*jj,mr,col; 2547 PetscInt *cspidx; 2548 2549 PetscFunctionBegin; 2550 *nn = n; 2551 if (!ia) PetscFunctionReturn(0); 2552 2553 ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr); 2554 ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr); 2555 ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr); 2556 ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr); 2557 jj = a->j; 2558 for (i=0; i<nz; i++) { 2559 collengths[jj[i]]++; 2560 } 2561 cia[0] = oshift; 2562 for (i=0; i<n; i++) { 2563 cia[i+1] = cia[i] + collengths[i]; 2564 } 2565 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2566 jj = a->j; 2567 for (row=0; row<m; row++) { 2568 mr = a->i[row+1] - a->i[row]; 2569 for (i=0; i<mr; i++) { 2570 col = *jj++; 2571 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2572 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2573 } 2574 } 2575 ierr = PetscFree(collengths);CHKERRQ(ierr); 2576 *ia = cia; *ja = cja; 2577 *spidx = cspidx; 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2583 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2584 { 2585 PetscErrorCode ierr; 2586 2587 PetscFunctionBegin; 2588 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2589 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2590 PetscFunctionReturn(0); 2591 } 2592 2593 /* -------------------------------------------------------------------*/ 2594 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2595 MatGetRow_SeqBAIJ, 2596 MatRestoreRow_SeqBAIJ, 2597 MatMult_SeqBAIJ_N, 2598 /* 4*/ MatMultAdd_SeqBAIJ_N, 2599 MatMultTranspose_SeqBAIJ, 2600 MatMultTransposeAdd_SeqBAIJ, 2601 0, 2602 0, 2603 0, 2604 /* 10*/ 0, 2605 MatLUFactor_SeqBAIJ, 2606 0, 2607 0, 2608 MatTranspose_SeqBAIJ, 2609 /* 15*/ MatGetInfo_SeqBAIJ, 2610 MatEqual_SeqBAIJ, 2611 MatGetDiagonal_SeqBAIJ, 2612 MatDiagonalScale_SeqBAIJ, 2613 MatNorm_SeqBAIJ, 2614 /* 20*/ 0, 2615 MatAssemblyEnd_SeqBAIJ, 2616 MatSetOption_SeqBAIJ, 2617 MatZeroEntries_SeqBAIJ, 2618 /* 24*/ MatZeroRows_SeqBAIJ, 2619 0, 2620 0, 2621 0, 2622 0, 2623 /* 29*/ MatSetUp_SeqBAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /* 34*/ MatDuplicate_SeqBAIJ, 2629 0, 2630 0, 2631 MatILUFactor_SeqBAIJ, 2632 0, 2633 /* 39*/ MatAXPY_SeqBAIJ, 2634 MatGetSubMatrices_SeqBAIJ, 2635 MatIncreaseOverlap_SeqBAIJ, 2636 MatGetValues_SeqBAIJ, 2637 MatCopy_SeqBAIJ, 2638 /* 44*/ 0, 2639 MatScale_SeqBAIJ, 2640 0, 2641 0, 2642 MatZeroRowsColumns_SeqBAIJ, 2643 /* 49*/ 0, 2644 MatGetRowIJ_SeqBAIJ, 2645 MatRestoreRowIJ_SeqBAIJ, 2646 MatGetColumnIJ_SeqBAIJ, 2647 MatRestoreColumnIJ_SeqBAIJ, 2648 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2649 0, 2650 0, 2651 0, 2652 MatSetValuesBlocked_SeqBAIJ, 2653 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2654 MatDestroy_SeqBAIJ, 2655 MatView_SeqBAIJ, 2656 0, 2657 0, 2658 /* 64*/ 0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2664 0, 2665 MatConvert_Basic, 2666 0, 2667 0, 2668 /* 74*/ 0, 2669 MatFDColoringApply_BAIJ, 2670 0, 2671 0, 2672 0, 2673 /* 79*/ 0, 2674 0, 2675 0, 2676 0, 2677 MatLoad_SeqBAIJ, 2678 /* 84*/ 0, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /* 89*/ 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /* 94*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /* 99*/ 0, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*104*/ 0, 2699 MatRealPart_SeqBAIJ, 2700 MatImaginaryPart_SeqBAIJ, 2701 0, 2702 0, 2703 /*109*/ 0, 2704 0, 2705 0, 2706 0, 2707 MatMissingDiagonal_SeqBAIJ, 2708 /*114*/ 0, 2709 0, 2710 0, 2711 0, 2712 0, 2713 /*119*/ 0, 2714 0, 2715 MatMultHermitianTranspose_SeqBAIJ, 2716 MatMultHermitianTransposeAdd_SeqBAIJ, 2717 0, 2718 /*124*/ 0, 2719 0, 2720 MatInvertBlockDiagonal_SeqBAIJ, 2721 0, 2722 0, 2723 /*129*/ 0, 2724 0, 2725 0, 2726 0, 2727 0, 2728 /*134*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*139*/ 0, 2734 0, 2735 0, 2736 MatFDColoringSetUp_SeqXAIJ 2737 }; 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2741 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2742 { 2743 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2744 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2749 2750 /* allocate space for values if not already there */ 2751 if (!aij->saved_values) { 2752 ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr); 2753 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2754 } 2755 2756 /* copy values over */ 2757 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2758 PetscFunctionReturn(0); 2759 } 2760 2761 #undef __FUNCT__ 2762 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2763 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2764 { 2765 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2766 PetscErrorCode ierr; 2767 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2768 2769 PetscFunctionBegin; 2770 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2771 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2772 2773 /* copy values over */ 2774 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2775 PetscFunctionReturn(0); 2776 } 2777 2778 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2779 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2780 2781 #undef __FUNCT__ 2782 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2783 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2784 { 2785 Mat_SeqBAIJ *b; 2786 PetscErrorCode ierr; 2787 PetscInt i,mbs,nbs,bs2; 2788 PetscBool flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2789 2790 PetscFunctionBegin; 2791 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2792 if (nz == MAT_SKIP_ALLOCATION) { 2793 skipallocation = PETSC_TRUE; 2794 nz = 0; 2795 } 2796 2797 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2798 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2799 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2800 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2801 2802 B->preallocated = PETSC_TRUE; 2803 2804 mbs = B->rmap->n/bs; 2805 nbs = B->cmap->n/bs; 2806 bs2 = bs*bs; 2807 2808 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2809 2810 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2811 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2812 if (nnz) { 2813 for (i=0; i<mbs; i++) { 2814 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2815 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2816 } 2817 } 2818 2819 b = (Mat_SeqBAIJ*)B->data; 2820 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2821 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 2822 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2823 2824 if (!flg) { 2825 switch (bs) { 2826 case 1: 2827 B->ops->mult = MatMult_SeqBAIJ_1; 2828 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2829 break; 2830 case 2: 2831 B->ops->mult = MatMult_SeqBAIJ_2; 2832 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2833 break; 2834 case 3: 2835 B->ops->mult = MatMult_SeqBAIJ_3; 2836 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2837 break; 2838 case 4: 2839 B->ops->mult = MatMult_SeqBAIJ_4; 2840 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2841 break; 2842 case 5: 2843 B->ops->mult = MatMult_SeqBAIJ_5; 2844 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2845 break; 2846 case 6: 2847 B->ops->mult = MatMult_SeqBAIJ_6; 2848 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2849 break; 2850 case 7: 2851 B->ops->mult = MatMult_SeqBAIJ_7; 2852 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2853 break; 2854 case 15: 2855 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2856 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2857 break; 2858 default: 2859 B->ops->mult = MatMult_SeqBAIJ_N; 2860 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2861 break; 2862 } 2863 } 2864 B->ops->sor = MatSOR_SeqBAIJ; 2865 b->mbs = mbs; 2866 b->nbs = nbs; 2867 if (!skipallocation) { 2868 if (!b->imax) { 2869 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2870 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2871 2872 b->free_imax_ilen = PETSC_TRUE; 2873 } 2874 /* b->ilen will count nonzeros in each block row so far. */ 2875 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2876 if (!nnz) { 2877 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2878 else if (nz < 0) nz = 1; 2879 for (i=0; i<mbs; i++) b->imax[i] = nz; 2880 nz = nz*mbs; 2881 } else { 2882 nz = 0; 2883 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2884 } 2885 2886 /* allocate the matrix space */ 2887 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2888 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2889 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2890 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2891 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2892 2893 b->singlemalloc = PETSC_TRUE; 2894 b->i[0] = 0; 2895 for (i=1; i<mbs+1; i++) { 2896 b->i[i] = b->i[i-1] + b->imax[i-1]; 2897 } 2898 b->free_a = PETSC_TRUE; 2899 b->free_ij = PETSC_TRUE; 2900 #if defined(PETSC_THREADCOMM_ACTIVE) 2901 ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr); 2902 #endif 2903 } else { 2904 b->free_a = PETSC_FALSE; 2905 b->free_ij = PETSC_FALSE; 2906 } 2907 2908 b->bs2 = bs2; 2909 b->mbs = mbs; 2910 b->nz = 0; 2911 b->maxnz = nz; 2912 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2913 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2914 PetscFunctionReturn(0); 2915 } 2916 2917 #undef __FUNCT__ 2918 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2919 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2920 { 2921 PetscInt i,m,nz,nz_max=0,*nnz; 2922 PetscScalar *values=0; 2923 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2924 PetscErrorCode ierr; 2925 2926 PetscFunctionBegin; 2927 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2928 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2929 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2930 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2931 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2932 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2933 m = B->rmap->n/bs; 2934 2935 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2936 ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr); 2937 for (i=0; i<m; i++) { 2938 nz = ii[i+1]- ii[i]; 2939 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2940 nz_max = PetscMax(nz_max, nz); 2941 nnz[i] = nz; 2942 } 2943 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2944 ierr = PetscFree(nnz);CHKERRQ(ierr); 2945 2946 values = (PetscScalar*)V; 2947 if (!values) { 2948 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2949 } 2950 for (i=0; i<m; i++) { 2951 PetscInt ncols = ii[i+1] - ii[i]; 2952 const PetscInt *icols = jj + ii[i]; 2953 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2954 if (!roworiented) { 2955 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2956 } else { 2957 PetscInt j; 2958 for (j=0; j<ncols; j++) { 2959 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2960 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2961 } 2962 } 2963 } 2964 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2965 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 2972 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*); 2973 #if defined(PETSC_HAVE_MUMPS) 2974 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2975 #endif 2976 extern PetscErrorCode MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*); 2977 2978 /*MC 2979 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2980 block sparse compressed row format. 2981 2982 Options Database Keys: 2983 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 2984 2985 Level: beginner 2986 2987 .seealso: MatCreateSeqBAIJ() 2988 M*/ 2989 2990 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 2991 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "MatCreate_SeqBAIJ" 2994 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 2995 { 2996 PetscErrorCode ierr; 2997 PetscMPIInt size; 2998 Mat_SeqBAIJ *b; 2999 3000 PetscFunctionBegin; 3001 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3002 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3003 3004 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3005 B->data = (void*)b; 3006 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3007 3008 b->row = 0; 3009 b->col = 0; 3010 b->icol = 0; 3011 b->reallocs = 0; 3012 b->saved_values = 0; 3013 3014 b->roworiented = PETSC_TRUE; 3015 b->nonew = 0; 3016 b->diag = 0; 3017 B->spptr = 0; 3018 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3019 b->keepnonzeropattern = PETSC_FALSE; 3020 b->xtoy = 0; 3021 b->XtoY = 0; 3022 3023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 3025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr); 3026 #if defined(PETSC_HAVE_MUMPS) 3027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr); 3028 #endif 3029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 #undef __FUNCT__ 3044 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3045 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3046 { 3047 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3048 PetscErrorCode ierr; 3049 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3050 3051 PetscFunctionBegin; 3052 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3053 3054 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3055 c->imax = a->imax; 3056 c->ilen = a->ilen; 3057 c->free_imax_ilen = PETSC_FALSE; 3058 } else { 3059 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3060 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3061 for (i=0; i<mbs; i++) { 3062 c->imax[i] = a->imax[i]; 3063 c->ilen[i] = a->ilen[i]; 3064 } 3065 c->free_imax_ilen = PETSC_TRUE; 3066 } 3067 3068 /* allocate the matrix space */ 3069 if (mallocmatspace) { 3070 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3071 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3072 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3073 3074 c->i = a->i; 3075 c->j = a->j; 3076 c->singlemalloc = PETSC_FALSE; 3077 c->free_a = PETSC_TRUE; 3078 c->free_ij = PETSC_FALSE; 3079 c->parent = A; 3080 C->preallocated = PETSC_TRUE; 3081 C->assembled = PETSC_TRUE; 3082 3083 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3084 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3085 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3086 } else { 3087 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3088 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3089 3090 c->singlemalloc = PETSC_TRUE; 3091 c->free_a = PETSC_TRUE; 3092 c->free_ij = PETSC_TRUE; 3093 3094 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3095 if (mbs > 0) { 3096 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3097 if (cpvalues == MAT_COPY_VALUES) { 3098 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3099 } else { 3100 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3101 } 3102 } 3103 C->preallocated = PETSC_TRUE; 3104 C->assembled = PETSC_TRUE; 3105 } 3106 } 3107 3108 c->roworiented = a->roworiented; 3109 c->nonew = a->nonew; 3110 3111 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3112 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3113 3114 c->bs2 = a->bs2; 3115 c->mbs = a->mbs; 3116 c->nbs = a->nbs; 3117 3118 if (a->diag) { 3119 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3120 c->diag = a->diag; 3121 c->free_diag = PETSC_FALSE; 3122 } else { 3123 ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr); 3124 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3125 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3126 c->free_diag = PETSC_TRUE; 3127 } 3128 } else c->diag = 0; 3129 3130 c->nz = a->nz; 3131 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3132 c->solve_work = NULL; 3133 c->mult_work = NULL; 3134 c->sor_workt = NULL; 3135 c->sor_work = NULL; 3136 3137 c->compressedrow.use = a->compressedrow.use; 3138 c->compressedrow.nrows = a->compressedrow.nrows; 3139 if (a->compressedrow.use) { 3140 i = a->compressedrow.nrows; 3141 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3142 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3143 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3144 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3145 } else { 3146 c->compressedrow.use = PETSC_FALSE; 3147 c->compressedrow.i = NULL; 3148 c->compressedrow.rindex = NULL; 3149 } 3150 C->nonzerostate = A->nonzerostate; 3151 3152 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3153 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3154 PetscFunctionReturn(0); 3155 } 3156 3157 #undef __FUNCT__ 3158 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3159 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3160 { 3161 PetscErrorCode ierr; 3162 3163 PetscFunctionBegin; 3164 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3165 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3166 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3167 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 #undef __FUNCT__ 3172 #define __FUNCT__ "MatLoad_SeqBAIJ" 3173 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3174 { 3175 Mat_SeqBAIJ *a; 3176 PetscErrorCode ierr; 3177 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3178 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3179 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3180 PetscInt *masked,nmask,tmp,bs2,ishift; 3181 PetscMPIInt size; 3182 int fd; 3183 PetscScalar *aa; 3184 MPI_Comm comm; 3185 3186 PetscFunctionBegin; 3187 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3188 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3189 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3190 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3191 if (bs < 0) bs = 1; 3192 bs2 = bs*bs; 3193 3194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3195 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3196 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3197 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3198 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3199 M = header[1]; N = header[2]; nz = header[3]; 3200 3201 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3202 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3203 3204 /* 3205 This code adds extra rows to make sure the number of rows is 3206 divisible by the blocksize 3207 */ 3208 mbs = M/bs; 3209 extra_rows = bs - M + bs*(mbs); 3210 if (extra_rows == bs) extra_rows = 0; 3211 else mbs++; 3212 if (extra_rows) { 3213 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3214 } 3215 3216 /* Set global sizes if not already set */ 3217 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3218 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3219 } else { /* Check if the matrix global sizes are correct */ 3220 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3221 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3222 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3223 } 3224 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3225 } 3226 3227 /* read in row lengths */ 3228 ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr); 3229 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3230 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3231 3232 /* read in column indices */ 3233 ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr); 3234 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3235 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3236 3237 /* loop over row lengths determining block row lengths */ 3238 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3239 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3240 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3241 rowcount = 0; 3242 nzcount = 0; 3243 for (i=0; i<mbs; i++) { 3244 nmask = 0; 3245 for (j=0; j<bs; j++) { 3246 kmax = rowlengths[rowcount]; 3247 for (k=0; k<kmax; k++) { 3248 tmp = jj[nzcount++]/bs; 3249 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3250 } 3251 rowcount++; 3252 } 3253 browlengths[i] += nmask; 3254 /* zero out the mask elements we set */ 3255 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3256 } 3257 3258 /* Do preallocation */ 3259 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3260 a = (Mat_SeqBAIJ*)newmat->data; 3261 3262 /* set matrix "i" values */ 3263 a->i[0] = 0; 3264 for (i=1; i<= mbs; i++) { 3265 a->i[i] = a->i[i-1] + browlengths[i-1]; 3266 a->ilen[i-1] = browlengths[i-1]; 3267 } 3268 a->nz = 0; 3269 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3270 3271 /* read in nonzero values */ 3272 ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr); 3273 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3274 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3275 3276 /* set "a" and "j" values into matrix */ 3277 nzcount = 0; jcount = 0; 3278 for (i=0; i<mbs; i++) { 3279 nzcountb = nzcount; 3280 nmask = 0; 3281 for (j=0; j<bs; j++) { 3282 kmax = rowlengths[i*bs+j]; 3283 for (k=0; k<kmax; k++) { 3284 tmp = jj[nzcount++]/bs; 3285 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3286 } 3287 } 3288 /* sort the masked values */ 3289 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3290 3291 /* set "j" values into matrix */ 3292 maskcount = 1; 3293 for (j=0; j<nmask; j++) { 3294 a->j[jcount++] = masked[j]; 3295 mask[masked[j]] = maskcount++; 3296 } 3297 /* set "a" values into matrix */ 3298 ishift = bs2*a->i[i]; 3299 for (j=0; j<bs; j++) { 3300 kmax = rowlengths[i*bs+j]; 3301 for (k=0; k<kmax; k++) { 3302 tmp = jj[nzcountb]/bs; 3303 block = mask[tmp] - 1; 3304 point = jj[nzcountb] - bs*tmp; 3305 idx = ishift + bs2*block + j + bs*point; 3306 a->a[idx] = (MatScalar)aa[nzcountb++]; 3307 } 3308 } 3309 /* zero out the mask elements we set */ 3310 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3311 } 3312 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3313 3314 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3315 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3316 ierr = PetscFree(aa);CHKERRQ(ierr); 3317 ierr = PetscFree(jj);CHKERRQ(ierr); 3318 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3319 3320 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3321 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3322 PetscFunctionReturn(0); 3323 } 3324 3325 #undef __FUNCT__ 3326 #define __FUNCT__ "MatCreateSeqBAIJ" 3327 /*@C 3328 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3329 compressed row) format. For good matrix assembly performance the 3330 user should preallocate the matrix storage by setting the parameter nz 3331 (or the array nnz). By setting these parameters accurately, performance 3332 during matrix assembly can be increased by more than a factor of 50. 3333 3334 Collective on MPI_Comm 3335 3336 Input Parameters: 3337 + comm - MPI communicator, set to PETSC_COMM_SELF 3338 . bs - size of block 3339 . m - number of rows 3340 . n - number of columns 3341 . nz - number of nonzero blocks per block row (same for all rows) 3342 - nnz - array containing the number of nonzero blocks in the various block rows 3343 (possibly different for each block row) or NULL 3344 3345 Output Parameter: 3346 . A - the matrix 3347 3348 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3349 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3350 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3351 3352 Options Database Keys: 3353 . -mat_no_unroll - uses code that does not unroll the loops in the 3354 block calculations (much slower) 3355 . -mat_block_size - size of the blocks to use 3356 3357 Level: intermediate 3358 3359 Notes: 3360 The number of rows and columns must be divisible by blocksize. 3361 3362 If the nnz parameter is given then the nz parameter is ignored 3363 3364 A nonzero block is any block that as 1 or more nonzeros in it 3365 3366 The block AIJ format is fully compatible with standard Fortran 77 3367 storage. That is, the stored row and column indices can begin at 3368 either one (as in Fortran) or zero. See the users' manual for details. 3369 3370 Specify the preallocated storage with either nz or nnz (not both). 3371 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3372 allocation. See Users-Manual: ch_mat for details. 3373 matrices. 3374 3375 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3376 @*/ 3377 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3378 { 3379 PetscErrorCode ierr; 3380 3381 PetscFunctionBegin; 3382 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3383 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3384 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3385 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3386 PetscFunctionReturn(0); 3387 } 3388 3389 #undef __FUNCT__ 3390 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3391 /*@C 3392 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3393 per row in the matrix. For good matrix assembly performance the 3394 user should preallocate the matrix storage by setting the parameter nz 3395 (or the array nnz). By setting these parameters accurately, performance 3396 during matrix assembly can be increased by more than a factor of 50. 3397 3398 Collective on MPI_Comm 3399 3400 Input Parameters: 3401 + B - the matrix 3402 . bs - size of block 3403 . nz - number of block nonzeros per block row (same for all rows) 3404 - nnz - array containing the number of block nonzeros in the various block rows 3405 (possibly different for each block row) or NULL 3406 3407 Options Database Keys: 3408 . -mat_no_unroll - uses code that does not unroll the loops in the 3409 block calculations (much slower) 3410 . -mat_block_size - size of the blocks to use 3411 3412 Level: intermediate 3413 3414 Notes: 3415 If the nnz parameter is given then the nz parameter is ignored 3416 3417 You can call MatGetInfo() to get information on how effective the preallocation was; 3418 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3419 You can also run with the option -info and look for messages with the string 3420 malloc in them to see if additional memory allocation was needed. 3421 3422 The block AIJ format is fully compatible with standard Fortran 77 3423 storage. That is, the stored row and column indices can begin at 3424 either one (as in Fortran) or zero. See the users' manual for details. 3425 3426 Specify the preallocated storage with either nz or nnz (not both). 3427 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3428 allocation. See Users-Manual: ch_mat for details. 3429 3430 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3431 @*/ 3432 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3433 { 3434 PetscErrorCode ierr; 3435 3436 PetscFunctionBegin; 3437 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3438 PetscValidType(B,1); 3439 PetscValidLogicalCollectiveInt(B,bs,2); 3440 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3441 PetscFunctionReturn(0); 3442 } 3443 3444 #undef __FUNCT__ 3445 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3446 /*@C 3447 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3448 (the default sequential PETSc format). 3449 3450 Collective on MPI_Comm 3451 3452 Input Parameters: 3453 + B - the matrix 3454 . i - the indices into j for the start of each local row (starts with zero) 3455 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3456 - v - optional values in the matrix 3457 3458 Level: developer 3459 3460 Notes: 3461 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3462 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3463 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3464 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3465 block column and the second index is over columns within a block. 3466 3467 .keywords: matrix, aij, compressed row, sparse 3468 3469 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3470 @*/ 3471 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3472 { 3473 PetscErrorCode ierr; 3474 3475 PetscFunctionBegin; 3476 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3477 PetscValidType(B,1); 3478 PetscValidLogicalCollectiveInt(B,bs,2); 3479 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3480 PetscFunctionReturn(0); 3481 } 3482 3483 3484 #undef __FUNCT__ 3485 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3486 /*@ 3487 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3488 3489 Collective on MPI_Comm 3490 3491 Input Parameters: 3492 + comm - must be an MPI communicator of size 1 3493 . bs - size of block 3494 . m - number of rows 3495 . n - number of columns 3496 . i - row indices 3497 . j - column indices 3498 - a - matrix values 3499 3500 Output Parameter: 3501 . mat - the matrix 3502 3503 Level: advanced 3504 3505 Notes: 3506 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3507 once the matrix is destroyed 3508 3509 You cannot set new nonzero locations into this matrix, that will generate an error. 3510 3511 The i and j indices are 0 based 3512 3513 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3514 3515 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3516 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3517 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3518 with column-major ordering within blocks. 3519 3520 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3521 3522 @*/ 3523 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3524 { 3525 PetscErrorCode ierr; 3526 PetscInt ii; 3527 Mat_SeqBAIJ *baij; 3528 3529 PetscFunctionBegin; 3530 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3531 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3532 3533 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3534 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3535 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3536 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3537 baij = (Mat_SeqBAIJ*)(*mat)->data; 3538 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3539 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3540 3541 baij->i = i; 3542 baij->j = j; 3543 baij->a = a; 3544 3545 baij->singlemalloc = PETSC_FALSE; 3546 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3547 baij->free_a = PETSC_FALSE; 3548 baij->free_ij = PETSC_FALSE; 3549 3550 for (ii=0; ii<m; ii++) { 3551 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3552 #if defined(PETSC_USE_DEBUG) 3553 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3554 #endif 3555 } 3556 #if defined(PETSC_USE_DEBUG) 3557 for (ii=0; ii<baij->i[m]; ii++) { 3558 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3559 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3560 } 3561 #endif 3562 3563 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3564 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3565 PetscFunctionReturn(0); 3566 } 3567